In [1]:
import pandas as pd

**Series**: Columnas   

**DataFrame**: Tabla multidimensional formada por una colección de series

# Series

In [2]:
ventas = pd.Series([15,20,21], index=['ene','feb','mar'])
ventas

ene    15
feb    20
mar    21
dtype: int64

In [3]:
ventas[1]

20

In [4]:
ventas['feb']

20

In [5]:
ventas.dtype

dtype('int64')

In [6]:
ventas.index

Index(['ene', 'feb', 'mar'], dtype='object')

In [7]:
ventas.values

array([15, 20, 21])

In [8]:
ventas.name = 'Ventas 2020'
ventas.name

'Ventas 2020'

In [9]:
ventas

ene    15
feb    20
mar    21
Name: Ventas 2020, dtype: int64

In [10]:
ventas.index.name = 'Meses'
ventas

Meses
ene    15
feb    20
mar    21
Name: Ventas 2020, dtype: int64

In [11]:
# Cómo le pongo el nombre a la columna de valores?

In [12]:
ventas.axes

[Index(['ene', 'feb', 'mar'], dtype='object', name='Meses')]

In [13]:
ventas.shape # (filas)

(3,)

# DataFrames 

In [14]:
datos = {'manzanas':[3,2,0,1],'naranjas':[0,3,7,2]}
datos

{'manzanas': [3, 2, 0, 1], 'naranjas': [0, 3, 7, 2]}

Pasar al constructor de datos `pd.DataFrame()`

In [15]:
compras = pd.DataFrame(datos)
compras

Unnamed: 0,manzanas,naranjas
0,3,0
1,2,3
2,0,7
3,1,2


In [16]:
compras.set_index('manzanas')

Unnamed: 0_level_0,naranjas
manzanas,Unnamed: 1_level_1
3,0
2,3
0,7
1,2


In [17]:
compras = pd.DataFrame(datos, index=['Juno', 'Robert', 'Lily', 'david'])
compras

Unnamed: 0,manzanas,naranjas
Juno,3,0
Robert,2,3
Lily,0,7
david,1,2


In [18]:
compras.index

Index(['Juno', 'Robert', 'Lily', 'david'], dtype='object')

In [19]:
compras.columns

Index(['manzanas', 'naranjas'], dtype='object')

In [20]:
compras.axes

[Index(['Juno', 'Robert', 'Lily', 'david'], dtype='object'),
 Index(['manzanas', 'naranjas'], dtype='object')]

In [21]:
compras.index.name = 'Clientes'
compras

Unnamed: 0_level_0,manzanas,naranjas
Clientes,Unnamed: 1_level_1,Unnamed: 2_level_1
Juno,3,0
Robert,2,3
Lily,0,7
david,1,2


In [22]:
compras.columns.name ='Frutas'
compras

Frutas,manzanas,naranjas
Clientes,Unnamed: 1_level_1,Unnamed: 2_level_1
Juno,3,0
Robert,2,3
Lily,0,7
david,1,2


In [23]:
compras.values

array([[3, 0],
       [2, 3],
       [0, 7],
       [1, 2]])

In [24]:
compras.shape # 4 filas, 2 clumnas

(4, 2)

# Creación de Series

In [25]:
s = pd.Series([7,5,3])
s

0    7
1    5
2    3
dtype: int64

In [26]:
s = pd.Series([7,5,3], index=["Ene", "Feb", "Mar"])
s

Ene    7
Feb    5
Mar    3
dtype: int64

In [27]:
d = {"Ene":7, "Feb":5, "Mar":3}
s = pd.Series(d)
s

Ene    7
Feb    5
Mar    3
dtype: int64

In [28]:
d = {"Ene":7, "Feb":5, "Mar":3}
s = pd.Series(d, index=["Abr", "Mar", "Feb", "Ene"], dtype=int)
s

Abr    NaN
Mar    3.0
Feb    5.0
Ene    7.0
dtype: float64

# Creación de dataframes

In [29]:
elementos = {
    'número atómico':[1, 6, 47, 88],
    'masa atómica':[1.008, 12.011, 107.87, 226],
    'Familia':['no metal', 'no metal', 'metal', 'metal']
}
elementos

{'Familia': ['no metal', 'no metal', 'metal', 'metal'],
 'masa atómica': [1.008, 12.011, 107.87, 226],
 'número atómico': [1, 6, 47, 88]}

In [30]:
tabla_periodica = pd.DataFrame(elementos)
tabla_periodica

Unnamed: 0,número atómico,masa atómica,Familia
0,1,1.008,no metal
1,6,12.011,no metal
2,47,107.87,metal
3,88,226.0,metal


In [31]:
tabla_periodica = pd.DataFrame(elementos,
                               index=['H', 'C', 'Ag', 'Ra'],
                               columns=['Familia', 'número atómico', 'masa atómica']
)
tabla_periodica

Unnamed: 0,Familia,número atómico,masa atómica
H,no metal,1,1.008
C,no metal,6,12.011
Ag,metal,47,107.87
Ra,metal,88,226.0


In [32]:
tabla_periodica.head()  

Unnamed: 0,Familia,número atómico,masa atómica
H,no metal,1,1.008
C,no metal,6,12.011
Ag,metal,47,107.87
Ra,metal,88,226.0


In [33]:
tabla_periodica.describe()

Unnamed: 0,número atómico,masa atómica
count,4.0,4.0
mean,35.5,86.72225
std,40.616089,104.521485
min,1.0,1.008
25%,4.75,9.26025
50%,26.5,59.9405
75%,57.25,137.4025
max,88.0,226.0


In [34]:
tabla_periodica.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, H to Ra
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Familia         4 non-null      object 
 1   número atómico  4 non-null      int64  
 2   masa atómica    4 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 128.0+ bytes


# Selección de datos en Series

In [35]:
s = pd.Series([10, 20, 30, 40])
s

0    10
1    20
2    30
3    40
dtype: int64

In [36]:
s = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])
s

a    10
b    20
c    30
d    40
dtype: int64

In [37]:
s.loc['b']

20

In [38]:
s.iloc[1]

20

In [39]:
s.sample(2, random_state=3)

d    40
b    20
dtype: int64

# Selección de datos en DataFrames

In [40]:
ventas = pd.DataFrame({
    'Entradas':[41,31,56,18],
    'Salidas':[17,54,6,78],
    'Valoración':[66,54,49,66],
    'Límite':['No','Si','No','No'],
    'Cambio':[1.43,1.16,-0.16,0.77]},
    index=['Ene', 'Feb', 'Mar', 'Abr'])
ventas

Unnamed: 0,Entradas,Salidas,Valoración,Límite,Cambio
Ene,41,17,66,No,1.43
Feb,31,54,54,Si,1.16
Mar,56,6,49,No,-0.16
Abr,18,78,66,No,0.77


In [41]:
print(type(ventas['Entradas']))

<class 'pandas.core.series.Series'>


In [42]:
ventas['Entradas']

Ene    41
Feb    31
Mar    56
Abr    18
Name: Entradas, dtype: int64

In [43]:
ventas['Entradas']['Feb']

31

In [44]:
ventas['Salidas']=1
ventas

Unnamed: 0,Entradas,Salidas,Valoración,Límite,Cambio
Ene,41,1,66,No,1.43
Feb,31,1,54,Si,1.16
Mar,56,1,49,No,-0.16
Abr,18,1,66,No,0.77


In [45]:
ventas['Perdidas'] = pd.Series([5,4,6,8], index=['Ene', 'Mar', 'Abr', 'May'])
ventas

Unnamed: 0,Entradas,Salidas,Valoración,Límite,Cambio,Perdidas
Ene,41,1,66,No,1.43,5.0
Feb,31,1,54,Si,1.16,
Mar,56,1,49,No,-0.16,4.0
Abr,18,1,66,No,0.77,6.0


In [46]:
ventas['Ganancias'] = (ventas['Entradas']*2) - (ventas['Valoración']/10)
ventas

Unnamed: 0,Entradas,Salidas,Valoración,Límite,Cambio,Perdidas,Ganancias
Ene,41,1,66,No,1.43,5.0,75.4
Feb,31,1,54,Si,1.16,,56.6
Mar,56,1,49,No,-0.16,4.0,107.1
Abr,18,1,66,No,0.77,6.0,29.4


In [47]:
ventas.Ganancias

Ene     75.4
Feb     56.6
Mar    107.1
Abr     29.4
Name: Ganancias, dtype: float64

# Edición de Series

In [48]:
import numpy as np

In [49]:
s = pd.Series(np.arange(0,10))
s

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [50]:
s.where(s%2 == 0)

0    0.0
1    NaN
2    2.0
3    NaN
4    4.0
5    NaN
6    6.0
7    NaN
8    8.0
9    NaN
dtype: float64

In [51]:
s.where(s % 2 == 0, 'Impar')

0        0
1    Impar
2        2
3    Impar
4        4
5    Impar
6        6
7    Impar
8        8
9    Impar
dtype: object

# Edición de DataFrame

In [52]:
df = pd.DataFrame(np.arange(12).reshape([4,3]),
                  index=['a','b','c','d'],
                  columns=['A','B','C'])
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [53]:
df.iloc[1,2]=-1
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,-1
c,6,7,8
d,9,10,11


In [54]:
df['A'] = [10, 20, 30, 40]
df

Unnamed: 0,A,B,C
a,10,1,2
b,20,4,-1
c,30,7,8
d,40,10,11


In [55]:
df['D'] = [10,20,30,40]
df

Unnamed: 0,A,B,C,D
a,10,1,2,10
b,20,4,-1,20
c,30,7,8,30
d,40,10,11,40


In [56]:
df.loc['e'] = [10, 20, 30, 40]
df

Unnamed: 0,A,B,C,D
a,10,1,2,10
b,20,4,-1,20
c,30,7,8,30
d,40,10,11,40
e,10,20,30,40


In [57]:
df = pd.DataFrame(np.arange(12).reshape([4,3]), columns=['A','B','C'])
df

Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [58]:
df.where(df % 2 == 0)

Unnamed: 0,A,B,C
0,0.0,,2.0
1,,4.0,
2,6.0,,8.0
3,,10.0,


In [59]:
df.where(df % 2 == 0, -df)

Unnamed: 0,A,B,C
0,0,-1,2
1,-3,4,-5
2,6,-7,8
3,-9,10,-11


In [60]:
df = pd.DataFrame(np.arange(16).reshape([4,4]),
                  index=['a','b','c','d'],
                  columns=['A','B','C', 'D'])
df

Unnamed: 0,A,B,C,D
a,0,1,2,3
b,4,5,6,7
c,8,9,10,11
d,12,13,14,15


In [61]:
df.drop(['a','c'], axis=0)

Unnamed: 0,A,B,C,D
b,4,5,6,7
d,12,13,14,15


In [62]:
df.drop(['B','D'], axis=1)

Unnamed: 0,A,C
a,0,2
b,4,6
c,8,10
d,12,14


# Unión de Series y Data Frames

In [63]:
s = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
r = pd.Series([10,11,12], index=['f','g','h'])
print(type(r))

<class 'pandas.core.series.Series'>


In [65]:
t = pd.concat([s,r])
print(type(t))
t

<class 'pandas.core.series.Series'>


a     1
b     2
c     3
d     4
e     5
f    10
g    11
h    12
dtype: int64

In [68]:
a = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
b = pd.Series([10,11,12], index=['a','b','f'])
pd.concat([a,b], axis=1, sort=True)

Unnamed: 0,0,1
a,1.0,10.0
b,2.0,11.0
c,3.0,
d,4.0,
e,5.0,
f,,12.0


In [72]:
type(pd.concat([a,b], axis =1, sort =True))

pandas.core.frame.DataFrame

In [74]:
s = pd.Series([1,2,3,4], index=['a','b','c','d'])
r = pd.Series([10,11,12], index=['a','c','e'])
pd.concat([s,r])

a     1
b     2
c     3
d     4
a    10
c    11
e    12
dtype: int64

In [77]:
a = pd.Series([1,2,3,4,5], index=['a','b','c','d','e'])
b = pd.Series([10,11,12], index=['f','g','h'])
c = a.append(b)
c

a     1
b     2
c     3
d     4
e     5
f    10
g    11
h    12
dtype: int64

In [79]:
d = b.append(a)
d

f    10
g    11
h    12
a     1
b     2
c     3
d     4
e     5
dtype: int64

In [80]:
c = a.append(b, ignore_index=True)
c

0     1
1     2
2     3
3     4
4     5
5    10
6    11
7    12
dtype: int64

# Concatenación y Unión de DataFrames

In [84]:
df1 = pd.DataFrame(np.arange(9).reshape([3,3]),
                   index=['a','b','d'],
                   columns=['A','B','C'])
df1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
d,6,7,8


In [87]:
df2 = pd.DataFrame(np.arange(12).reshape([4,3]),
                   index=['a','b','c','e'],
                   columns=['B','C','D'])
df2

Unnamed: 0,B,C,D
a,0,1,2
b,3,4,5
c,6,7,8
e,9,10,11


In [88]:
pd.concat([df1, df2])

Unnamed: 0,A,B,C,D
a,0.0,1,2,
b,3.0,4,5,
d,6.0,7,8,
a,,0,1,2.0
b,,3,4,5.0
c,,6,7,8.0
e,,9,10,11.0


In [90]:
pd.concat([df2, df1])

Unnamed: 0,B,C,D,A
a,0,1,2.0,
b,3,4,5.0,
c,6,7,8.0,
e,9,10,11.0,
a,1,2,,0.0
b,4,5,,3.0
d,7,8,,6.0


In [92]:
pd.concat([df1, df2], axis=1) 


Unnamed: 0,A,B,C,B.1,C.1,D
a,0.0,1.0,2.0,0.0,1.0,2.0
b,3.0,4.0,5.0,3.0,4.0,5.0
d,6.0,7.0,8.0,,,
c,,,,6.0,7.0,8.0
e,,,,9.0,10.0,11.0


In [95]:
pd.concat([df1, df2], join='inner')

Unnamed: 0,B,C
a,1,2
b,4,5
d,7,8
a,0,1
b,3,4
c,6,7
e,9,10


In [97]:
pd.concat([df1, df2], axis=1, join='inner')

Unnamed: 0,A,B,C,B.1,C.1,D
a,0,1,2,0,1,2
b,3,4,5,3,4,5


In [98]:
pd.concat([df1, df2], axis=1, join='inner',ignore_index=True)

Unnamed: 0,0,1,2,3,4,5
a,0,1,2,0,1,2
b,3,4,5,3,4,5


**.append()**

In [100]:
df1 = pd.DataFrame(np.arange(9).reshape([3,3]),
                   index= ['a','b','d'],
                   columns= ['A','B','C'])
df1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
d,6,7,8


In [102]:
df2 = pd.DataFrame(np.arange(12).reshape([4,3]),
                   index= ['a','b','c','e'],
                   columns= ['B','C','D'])
df2

Unnamed: 0,B,C,D
a,0,1,2
b,3,4,5
c,6,7,8
e,9,10,11


In [103]:
df1.append(df2)

Unnamed: 0,A,B,C,D
a,0.0,1,2,
b,3.0,4,5,
d,6.0,7,8,
a,,0,1,2.0
b,,3,4,5.0
c,,6,7,8.0
e,,9,10,11.0
