# Pandas desde cero
https://www.youtube.com/watch?v=zVgmPAn09Uk&list=PL6GZzVvsfrGExyZ68t8MkrX3AUaHlaU-t
https://github.com/OrvizarHub/PANDAS_Curso/blob/master/CursoPandas/Leccion01/01PandasCursodesdeCero.ipynb

In [1]:
import pandas as pd

## Series

### Tipos de datos

In [7]:
s = pd.Series([1,2,3,'a'])
s

0    1
1    2
2    3
3    a
dtype: object

In [8]:
s = pd.Series([1,2,3,4])
s

0    1
1    2
2    3
3    4
dtype: int64

In [9]:
s = pd.Series([1,2,3,3.5])
s

0    1.0
1    2.0
2    3.0
3    3.5
dtype: float64

### Índices y valores

In [11]:
s.values

array([1. , 2. , 3. , 3.5])

In [13]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [15]:
s

0    1.0
1    2.0
2    3.0
3    3.5
dtype: float64

In [21]:
s.index=[3,4,5,6]
s

3    1.0
4    2.0
5    3.0
6    3.5
dtype: float64

### Selecciones

In [23]:
s[ s>2 ]

5    3.0
6    3.5
dtype: float64

### Proyecciones extendidas

In [24]:
s*2

3    2.0
4    4.0
5    6.0
6    7.0
dtype: float64

In [31]:
import numpy as np
np.exp(s)

3     2.718282
4     7.389056
5    20.085537
6    33.115452
dtype: float64

### Búsquedas en los índices

In [32]:
6 in s

True

In [33]:
3.5 in s

False

### Series a partir de diccionarios

In [38]:
data={'Brazil': 212003366,'Colombia':50750920,'Argentina':45034295}
p = pd.Series(data)
p

Brazil       212003366
Colombia      50750920
Argentina     45034295
dtype: int64

Nótese como, utilizando índices que no existen en el diccionario, no los cumplimenta en la serie

In [39]:
paises = {'Ecuador','Colombia','Perú'}
p2 = pd.Series(data,index=paises)
p2

Perú               NaN
Colombia    50750920.0
Ecuador            NaN
dtype: float64

¿Cuáles serían esos valores nulos de esta última serie?

In [43]:
p2.isnull()

Perú         True
Colombia    False
Ecuador      True
dtype: bool

Si sumamos ambas series, sólo se suman los valores no nulos

In [45]:
p+p2

Argentina            NaN
Brazil               NaN
Colombia     101501840.0
Ecuador              NaN
Perú                 NaN
dtype: float64

### Nombrar índices y columnas

In [47]:
p

Brazil       212003366
Colombia      50750920
Argentina     45034295
dtype: int64

In [50]:
p.name='habitantes'
p.index.name='paises'
p

paises
Brazil       212003366
Colombia      50750920
Argentina     45034295
Name: habitantes, dtype: int64

# DataFrames
Una Series es un vertor

Un DataFrame es una matriz

Una df tiene índice de fila e índice de columna, puede ser considerado como un diccionario de series

In [53]:
datos = {'Paises': ['Ecuador','Colombia','Chile','Peru','Paraguay','Bolivia','Brazil','Argentina',
'Uruguay'],
'Codigo': ['ECU','COL','CHL','PER','PRY','BOL','BRA','ARG','URY'],
'GINI 2017': [44.7,49.7,46.6,43.3,48.8,44,53.3,41.2,39.5]}
df = pd.DataFrame(datos)
df

Unnamed: 0,Paises,Codigo,GINI 2017
0,Ecuador,ECU,44.7
1,Colombia,COL,49.7
2,Chile,CHL,46.6
3,Peru,PER,43.3
4,Paraguay,PRY,48.8
5,Bolivia,BOL,44.0
6,Brazil,BRA,53.3
7,Argentina,ARG,41.2
8,Uruguay,URY,39.5


### Obtener columnas como diccionarios  o como atributos

In [58]:
# como diccionario
df['Codigo']

0    ECU
1    COL
2    CHL
3    PER
4    PRY
5    BOL
6    BRA
7    ARG
8    URY
Name: Codigo, dtype: object

In [63]:
# como atributo
df.Codigo

Codigo
ECU    ECU
COL    COL
CHL    CHL
PER    PER
PRY    PRY
BOL    BOL
BRA    BRA
ARG    ARG
URY    URY
Name: Codigo, dtype: object

### Asignación de uno de los campos como índice del df

In [80]:
df.index=df.Codigo
df

Unnamed: 0_level_0,Paises,Codigo,GINI 2017
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ECU,Ecuador,ECU,44.7
COL,Colombia,COL,49.7
CHL,Chile,CHL,46.6
PER,Peru,PER,43.3
PRY,Paraguay,PRY,48.8
BOL,Bolivia,BOL,44.0
BRA,Brazil,BRA,53.3
ARG,Argentina,ARG,41.2
URY,Uruguay,URY,39.5


### loc[] e iloc[]

In [81]:
# localiza por índice
df.loc['BRA']

Paises       Brazil
Codigo          BRA
GINI 2017      53.3
Name: BRA, dtype: object

In [82]:
# localiza por posición
df.iloc[5]

Paises       Bolivia
Codigo           BOL
GINI 2017         44
Name: BOL, dtype: object

In [83]:
df[::2]

Unnamed: 0_level_0,Paises,Codigo,GINI 2017
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ECU,Ecuador,ECU,44.7
CHL,Chile,CHL,46.6
PRY,Paraguay,PRY,48.8
BRA,Brazil,BRA,53.3
URY,Uruguay,URY,39.5


### drop() Eliminar columnas

In [97]:
df[ df.Paises=='Ecuador' ].drop('GINI 2017',1)

Unnamed: 0_level_0,Paises,Codigo
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1
ECU,Ecuador,ECU


In [122]:
df.drop('GINI 2017',1)

Unnamed: 0_level_0,Paises,Codigo
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1
ECU,Ecuador,ECU
COL,Colombia,COL
CHL,Chile,CHL
PER,Peru,PER
PRY,Paraguay,PRY
BOL,Bolivia,BOL
BRA,Brazil,BRA
ARG,Argentina,ARG
URY,Uruguay,URY


### drop() Eliminar filas

In [125]:
df['eliminar']= df.Paises=='Ecuador'
df

Unnamed: 0_level_0,Paises,Codigo,GINI 2017,eliminar
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ECU,Ecuador,ECU,44.7,True
COL,Colombia,COL,49.7,False
CHL,Chile,CHL,46.6,False
PER,Peru,PER,43.3,False
PRY,Paraguay,PRY,48.8,False
BOL,Bolivia,BOL,44.0,False
BRA,Brazil,BRA,53.3,False
ARG,Argentina,ARG,41.2,False
URY,Uruguay,URY,39.5,False


In [126]:
df.drop(df[df.eliminar].index)

Unnamed: 0_level_0,Paises,Codigo,GINI 2017,eliminar
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
COL,Colombia,COL,49.7,False
CHL,Chile,CHL,46.6,False
PER,Peru,PER,43.3,False
PRY,Paraguay,PRY,48.8,False
BOL,Bolivia,BOL,44.0,False
BRA,Brazil,BRA,53.3,False
ARG,Argentina,ARG,41.2,False
URY,Uruguay,URY,39.5,False


In [127]:
df=df.drop('eliminar',1)
df

Unnamed: 0_level_0,Paises,Codigo,GINI 2017
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ECU,Ecuador,ECU,44.7
COL,Colombia,COL,49.7
CHL,Chile,CHL,46.6
PER,Peru,PER,43.3
PRY,Paraguay,PRY,48.8
BOL,Bolivia,BOL,44.0
BRA,Brazil,BRA,53.3
ARG,Argentina,ARG,41.2
URY,Uruguay,URY,39.5


In [131]:
df.drop(df[df.Paises.str[0]=='C'].index)

Unnamed: 0_level_0,Paises,Codigo,GINI 2017
Codigo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ECU,Ecuador,ECU,44.7
PER,Peru,PER,43.3
PRY,Paraguay,PRY,48.8
BOL,Bolivia,BOL,44.0
BRA,Brazil,BRA,53.3
ARG,Argentina,ARG,41.2
URY,Uruguay,URY,39.5
