# Pandas Walkthrough

In [1]:
# We need to import Pandas & Numpy
import pandas as pd # pd is the alias
import numpy as np #np is the alias

### Basic forms -> Serie & Dataframe

In [2]:
my_serie = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
my_serie

a    1.033952
b   -0.104856
c   -0.796489
d   -1.044549
e    0.755986
dtype: float64

In [3]:
my_dataframe = pd.DataFrame({
    'Name': ['Andres', 'Melissa', 'Freddy'],
    'Lastname': ['Gutierrez', 'Cardenas', 'Vega'],
    'Age': [23, 22, 35],
})
display(my_dataframe)

Unnamed: 0,Name,Lastname,Age
0,Andres,Gutierrez,23
1,Melissa,Cardenas,22
2,Freddy,Vega,35


In [4]:
my_dataframe['Name']

0     Andres
1    Melissa
2     Freddy
Name: Name, dtype: object

In [5]:
my_dataframe.Name # danger!

0     Andres
1    Melissa
2     Freddy
Name: Name, dtype: object

#### Simple query

In [6]:
my_dataframe[my_dataframe['Name'] == 'Andres']

Unnamed: 0,Name,Lastname,Age
0,Andres,Gutierrez,23


In [7]:
mask = [True, False, False]

In [8]:
my_dataframe[mask]

Unnamed: 0,Name,Lastname,Age
0,Andres,Gutierrez,23


### Read external file

In [9]:
pokemon = pd.read_excel('pokemon.xlsx', sheet_name='Pokemon')
pokemon.head()

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed
0,1,Bulbasaur,GRASS,318,45,49,49,65,65,45
1,1,Bulbasaur,POISON,318,45,49,49,65,65,45
2,2,Ivysaur,GRASS,405,60,62,63,80,80,60
3,2,Ivysaur,POISON,405,60,62,63,80,80,60
4,3,Venusaur,GRASS,525,80,82,83,100,100,80


In [10]:
pokemon_moves = pd.read_excel('pokemon.xlsx', sheet_name='Moves')
pokemon_moves.head()

Unnamed: 0,Name,Type,Cat.,Power,Acc.,PP,TM,Effect,Prob. (%)
0,Absorb,GRASS,Special,20.0,100.0,25.0,,User recovers half the HP inflicted on opponent.,
1,Acid,POISON,Special,40.0,100.0,30.0,,May lower opponent's Special Defense.,10.0
2,Acid Armor,POISON,Status,,,40.0,,Sharply raises user's Defense.,
3,Acid Spray,POISON,Special,40.0,100.0,20.0,,Sharply lowers opponent's Special Defense.,100.0
4,Acrobatics,FLYING,Physical,55.0,100.0,15.0,TM62,Stronger when the user does not have a held item.,


In [11]:
pokemon_evolution = pd.read_excel('pokemon.xlsx', sheet_name='Evolution')
pokemon_evolution.head()

Unnamed: 0,Evolving from,Evolving to,Level,Condition,Evolution Type
0,Bulbasaur,Ivysaur,16.0,,Level
1,Ivysaur,Venusaur,32.0,,Level
2,Charmander,Charmeleon,16.0,,Level
3,Charmeleon,Charizard,36.0,,Level
4,Squirtle,Wartortle,16.0,,Level


## Applying merge

In [12]:
merged_pokemon = pokemon.merge(pokemon_evolution, left_on=['Name'], right_on=['Evolving from'])
merged_pokemon.head()

Unnamed: 0,#,Name,Type,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed,Evolving from,Evolving to,Level,Condition,Evolution Type
0,1,Bulbasaur,GRASS,318,45,49,49,65,65,45,Bulbasaur,Ivysaur,16.0,,Level
1,1,Bulbasaur,POISON,318,45,49,49,65,65,45,Bulbasaur,Ivysaur,16.0,,Level
2,2,Ivysaur,GRASS,405,60,62,63,80,80,60,Ivysaur,Venusaur,32.0,,Level
3,2,Ivysaur,POISON,405,60,62,63,80,80,60,Ivysaur,Venusaur,32.0,,Level
4,4,Charmander,FIRE,309,39,52,43,60,50,65,Charmander,Charmeleon,16.0,,Level


## Get all types by Pokemon

In [13]:
merged_pokemon[merged_pokemon['Name']=='Bulbasaur']['Type']

0     GRASS
1    POISON
Name: Type, dtype: object

In [14]:
merged_pokemon.groupby(by=['Name']).mean()

Unnamed: 0_level_0,Total,HP,Attack,Defense,Special Attack,Special Defense,Speed,Level
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Abra,310,25,20,15,105,55,90,16.0
Aipom,360,55,70,55,40,55,85,
Amaura,362,77,59,50,67,63,46,39.0
Anorith,355,45,95,50,40,50,75,40.0
Archen,401,55,112,45,74,45,70,37.0
Aron,330,50,70,100,40,40,30,32.0
Axew,320,46,87,60,30,40,57,38.0
Azurill,190,50,20,40,20,40,20,
Bagon,300,45,75,60,40,30,50,30.0
Baltoy,300,40,40,55,40,70,55,36.0


# BANK ANALYSIS

In [20]:
my_bank = pd.read_excel('transactions.xls')
display(my_bank.head(3))
display(my_bank.tail(3))

Unnamed: 0,Date,Code,Description,Unnamed: 3,Debits,Credits,Balance
0,20/01/2019,CP,GROCERY STORE 1,,1.39,0.0,13.61
1,20/01/2019,CP,GROCERY STORE 1,,3.16,0.0,10.45
2,24/01/2019,CP,LOS PALETEROS,,9.32,0.0,1.13


Unnamed: 0,Date,Code,Description,Unnamed: 3,Debits,Credits,Balance
338,14/10/2019,CP,UBER *TRIP 800-5,,1.71,0.0,354.2
339,14/10/2019,TF,TEF DE:MILTON CARDENAS NAJERA,,0.0,70.0,424.2
340,16/10/2019,TF,TEF DE: 941148827,,0.0,400.0,824.2


In [24]:
#my_bank.columns.tolist()
my_bank = my_bank.drop(['Unnamed: 3'], axis=1)
#my_bank.drop(['Unnamed: 3'], axis=1, inplace=True)

In [25]:
my_bank.head()

Unnamed: 0,Date,Code,Description,Debits,Credits,Balance
0,20/01/2019,CP,GROCERY STORE 1,1.39,0.0,13.61
1,20/01/2019,CP,GROCERY STORE 1,3.16,0.0,10.45
2,24/01/2019,CP,LOS PALETEROS,9.32,0.0,1.13
3,24/01/2019,MC,TFT DE Gutierrez_Arcia_Andr,0.0,50.0,51.13
4,31/01/2019,3O,INTERESES,0.0,0.08,51.21


In [27]:
my_bank.shape

(341, 6)

In [35]:
my_bank['Description'].value_counts()#.sort_index(ascending=True)

UBER   *TRIP             800-5          53
UBER TRIP HELP.UBER.COM      .          50
RETIRO ATM 353714          HER          14
AM PM LA AURORA HEREDIA                 13
PRF DEBITO NORMAL 474581221635          11
GROCERY STORE 1                         11
INTERESES                                9
SODA BUENISIMO                           8
EXTREME TECH                             7
WALMART TIBAS          DIRECTO           7
TEF DE: 941148827                        6
UBR* PENDING.UBER.COM    800-5           5
UBER   *EATS             800-5           5
TEF A : 924924525                        4
MAS X MENOS TIBAS      DIRECTO           4
CLINICA ASEMBIS HEREDIA                  3
PLATZI BASIC (MONTHLY)   HTTPS           3
MAS X MENOS HEREDIA    DIRECTO           3
FISCHEL TIBAS S05                        3
UBER EATS HELP.UBER.COM      .           3
CINEPOLIS LINCOLN                        3
RESTAURANTE PIMIENTA VERDHERED           2
Payroll Pasan Apr302019 GFT CR           2
WALMART  HE