In [1]:
import pandas as pd

In [2]:
data = {'Estado': ['Baja_California', 'Guanajuato', 'E_Mexico', 
                   'CDMX', 'Yucatan'],
        'Mujeres': [291, 568, 490, 107, 7],
        'Hombres': [2279, 3408, 2725, 635, 47]}

In [4]:
frame = pd.DataFrame(data)

In [5]:
frame

Unnamed: 0,Estado,Mujeres,Hombres
0,Baja_California,291,2279
1,Guanajuato,568,3408
2,E_Mexico,490,2725
3,CDMX,107,635
4,Yucatan,7,47


Las columnas se pueden modificar por asignación. 

In [6]:
frame['num'] = 23

Cuando se asignan istas o arrays a una columna, la longitud del valor debe coincidir con el dataframe.

In [9]:
frame['Region'] = ['Noroeste','Noreste','Occidente',
                   'Centro-Sur','Sureste']

In [10]:
frame

Unnamed: 0,Estado,Mujeres,Hombres,num,Region
0,Baja_California,291,2279,23,Noroeste
1,Guanajuato,568,3408,23,Noreste
2,E_Mexico,490,2725,23,Occidente
3,CDMX,107,635,23,Centro-Sur
4,Yucatan,7,47,23,Sureste


In [11]:
frame['suma']=frame['Mujeres']+frame['num']

In [12]:
frame

Unnamed: 0,Estado,Mujeres,Hombres,num,Region,suma
0,Baja_California,291,2279,23,Noroeste,314
1,Guanajuato,568,3408,23,Noreste,591
2,E_Mexico,490,2725,23,Occidente,513
3,CDMX,107,635,23,Centro-Sur,130
4,Yucatan,7,47,23,Sureste,30


La palabra clave **del** borrará columnas, como con un diccionario. 

In [13]:
del frame['num']

In [14]:
frame

Unnamed: 0,Estado,Mujeres,Hombres,Region,suma
0,Baja_California,291,2279,Noroeste,314
1,Guanajuato,568,3408,Noreste,591
2,E_Mexico,490,2725,Occidente,513
3,CDMX,107,635,Centro-Sur,130
4,Yucatan,7,47,Sureste,30


Otra forma habitual de datos es un diccionario anidado de diccionarios:

In [15]:
poblacion_m = {'Hidalgo': {2000:1153598,2010:1379796,2020:1601462},
            'Michoacan':{2000:2074589,2010:2248928,2020:2442505},
            'Chiapas':{2000:1979012,2010:2443773,2020:2837881},
            'CDMX':{2000:4494754,2010:4617297,2020:4805017}}

In [16]:
poblacion_m

{'Hidalgo': {2000: 1153598, 2010: 1379796, 2020: 1601462},
 'Michoacan': {2000: 2074589, 2010: 2248928, 2020: 2442505},
 'Chiapas': {2000: 1979012, 2010: 2443773, 2020: 2837881},
 'CDMX': {2000: 4494754, 2010: 4617297, 2020: 4805017}}

In [17]:
frame_p = pd.DataFrame(poblacion_m)

In [18]:
frame_p

Unnamed: 0,Hidalgo,Michoacan,Chiapas,CDMX
2000,1153598,2074589,1979012,4494754
2010,1379796,2248928,2443773,4617297
2020,1601462,2442505,2837881,4805017


Es posible transponer el dataframe (intercambiar filas y columnas) con una sintaxis similar a la de un array NumPy:

In [19]:
frame_p.T

Unnamed: 0,2000,2010,2020
Hidalgo,1153598,1379796,1601462
Michoacan,2074589,2248928,2442505
Chiapas,1979012,2443773,2837881
CDMX,4494754,4617297,4805017


Las claves de los diccionarios internos se combinan para formar el índice del resultado. Esto no aplica si se especifica un índice explícito:

In [22]:
pd.DataFrame(frame_p, index=[2010, 2010, 2020])

Unnamed: 0,Hidalgo,Michoacan,Chiapas,CDMX
2010,1379796,2248928,2443773,4617297
2010,1379796,2248928,2443773,4617297
2020,1601462,2442505,2837881,4805017


In [24]:
df = pd.read_csv('homicidios_2022.csv')

In [25]:
df

Unnamed: 0,Estado,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
0,Aguascalientes,AG,Noreste,13,73,1,87
1,Baja California,BC,Noroeste,291,2279,111,2681
2,Baja California Sur,BS,Noroeste,15,74,0,89
3,Campeche,CM,Sureste,13,95,2,110
4,Coahuila de Zaragoza,CO,Noreste,26,143,0,169
5,Colima,CL,Occidente,110,755,2,867
6,Chiapas,CS,Sureste,65,432,2,499
7,Chihuahua,CH,Noroeste,237,1776,3,2016
8,CDMX,CX,Centro-Sur,107,635,0,742
9,Durango,DG,Noroeste,25,100,0,125


In [7]:
df

Unnamed: 0,Estado,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
0,Aguascalientes,AG,Noreste,13,73,1,87
1,Baja California,BC,Noroeste,291,2279,111,2681
2,Baja California Sur,BS,Noroeste,15,74,0,89
3,Campeche,CM,Sureste,13,95,2,110
4,Coahuila de Zaragoza,CO,Noreste,26,143,0,169
5,Colima,CL,Occidente,110,755,2,867
6,Chiapas,CS,Sureste,65,432,2,499
7,Chihuahua,CH,Noroeste,237,1776,3,2016
8,CDMX,CX,Centro-Sur,107,635,0,742
9,Durango,DG,Noroeste,25,100,0,125


In [27]:
df.iloc[10:25]

Unnamed: 0,Estado,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
10,Guanajuato,GT,Noreste,568,3408,280,4256
11,Guerrero,GR,Centro-Sur,133,1226,19,1378
12,Hidalgo,HG,Occidente,57,314,9,380
13,Jalisco,JC,Noroeste,203,1639,3,1845
14,Mexico,EM,Occidente,490,2725,11,3226
15,Michoacan,MI,Occidente,241,2044,7,2292
16,Morelos,MO,Centro-Sur,109,1040,0,1149
17,Nayarit,,Noroeste,16,169,1,186
18,Nuevo Leon,NL,Noreste,150,1240,1,1391
19,Oaxaca,OA,Sureste,114,626,0,740


In [35]:
df1=df.iloc[9:15]

In [36]:
df1

Unnamed: 0,Estado,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
9,Durango,DG,Noroeste,25,100,0,125
10,Guanajuato,GT,Noreste,568,3408,280,4256
11,Guerrero,GR,Centro-Sur,133,1226,19,1378
12,Hidalgo,HG,Occidente,57,314,9,380
13,Jalisco,JC,Noroeste,203,1639,3,1845
14,Mexico,EM,Occidente,490,2725,11,3226


In [37]:
df1.set_index("Estado")

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [38]:
df1 = df1.set_index("Estado")

In [39]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


Una indexación como esta tiene varios casos especiales. Como cortar o seleccionar datos con un array booleano:

In [40]:
df1[1:4]

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380


In [41]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [44]:
df1[df1['Mujeres']>100]

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [42]:
df1['Total']>1000

Estado
 Durango       False
 Guanajuato     True
 Guerrero       True
 Hidalgo       False
 Jalisco        True
 Mexico         True
Name: Total, dtype: bool

In [45]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [46]:
df1['Total'].sum()

11210

In [47]:
df['Total'].sum()

32223

ordenar

In [52]:
df1.sort_values(by=['Regiones'],ascending=False) #ascending=False

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hidalgo,HG,Occidente,57,314,9,380
Mexico,EM,Occidente,490,2725,11,3226
Durango,DG,Noroeste,25,100,0,125
Jalisco,JC,Noroeste,203,1639,3,1845
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378


¿Cuantos homicidios hubo en la región Noroeste en 2022?

In [58]:
df.set_index("Estado")

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aguascalientes,AG,Noreste,13,73,1,87
Baja California,BC,Noroeste,291,2279,111,2681
Baja California Sur,BS,Noroeste,15,74,0,89
Campeche,CM,Sureste,13,95,2,110
Coahuila de Zaragoza,CO,Noreste,26,143,0,169
Colima,CL,Occidente,110,755,2,867
Chiapas,CS,Sureste,65,432,2,499
Chihuahua,CH,Noroeste,237,1776,3,2016
CDMX,CX,Centro-Sur,107,635,0,742
Durango,DG,Noroeste,25,100,0,125


In [63]:
noroeste=df.sort_values(by=['Regiones']).iloc[13:21]

In [64]:
noroeste['Total'].sum()

9223

In [65]:
df[df['Regiones']=='Noroeste'].Total.sum()

9223

### Eliminar entradas de un eje

In [66]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


Con objetos DataFrame, los valores de índice se pueden borrar de cualquier eje. 

In [70]:
df1.drop(index=[' Durango',' Jalisco'])

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Mexico,EM,Occidente,490,2725,11,3226


In [69]:
df1.index

Index([' Durango', ' Guanajuato', ' Guerrero', ' Hidalgo', ' Jalisco',
       ' Mexico'],
      dtype='object', name='Estado')

Para eliminar etiquetas de las columnas, usamos sin embargo la palabra clave columns:

In [71]:
df1.drop('Abreviatura', axis=1)

Unnamed: 0_level_0,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Durango,Noroeste,25,100,0,125
Guanajuato,Noreste,568,3408,280,4256
Guerrero,Centro-Sur,133,1226,19,1378
Hidalgo,Occidente,57,314,9,380
Jalisco,Noroeste,203,1639,3,1845
Mexico,Occidente,490,2725,11,3226


El operador iloc que indexa exclusivamente con enteros

In [72]:
df1.iloc[[3,5]]

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hidalgo,HG,Occidente,57,314,9,380
Mexico,EM,Occidente,490,2725,11,3226


In [74]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


el operador loc indexa exclusivamente con etiquetas

In [73]:
df1.loc[[' Hidalgo',' Mexico']]

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hidalgo,HG,Occidente,57,314,9,380
Mexico,EM,Occidente,490,2725,11,3226


In [75]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [76]:
df1.loc[' Durango':' Hidalgo']

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,DG,Noroeste,25,100,0,125
Guanajuato,GT,Noreste,568,3408,280,4256
Guerrero,GR,Centro-Sur,133,1226,19,1378
Hidalgo,HG,Occidente,57,314,9,380


Asignar valores utilizando estos métodos modifica la sección correspondiente de la serie:

In [77]:
df1.loc[' Durango':' Hidalgo']=0

In [78]:
df1

Unnamed: 0_level_0,Abreviatura,Regiones,Mujeres,Hombres,No_esp,Total
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Durango,0,0,0,0,0,0
Guanajuato,0,0,0,0,0,0
Guerrero,0,0,0,0,0,0
Hidalgo,0,0,0,0,0,0
Jalisco,JC,Noroeste,203,1639,3,1845
Mexico,EM,Occidente,490,2725,11,3226


In [79]:
poblacion = pd.read_csv('https://raw.githubusercontent.com/jamc88/TSMCI-Analisis-de-datos-con-Python/main/Datos/poblacion.csv')

In [80]:
poblacion=poblacion.set_index("Estado")

In [81]:
poblacion

Unnamed: 0_level_0,Regiones,H_1990,M_1990,H_2000,M_2000,H_2010,M_2010,H_2020,M_2020
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Aguascalientes,Noreste,350218,369441,456533,487752,576638,608358,696683,728924
Baja California,Noroeste,832090,828765,1252581,1234786,1591610,1563460,1900589,1868431
Baja California Sur,Noroeste,161833,155931,216250,207791,325433,311593,405879,392568
Campeche,Sureste,268772,266413,344334,346355,407721,414720,456939,471424
Coahuila de Zaragoza,Noreste,979097,993243,1140195,1157875,1364197,1384194,1563669,1583102
Colima,Occidente,212543,215967,268192,274435,322790,327765,360622,370769
Chiapas,Sureste,1604773,1605723,1941880,1979012,2352807,2443773,2705947,2837881
Chihuahua,Noroeste,1213302,1228571,1519972,1532935,1692545,1713920,1853822,1888047
CDMX,Centro-Sur,3939911,4295833,4110485,4494754,4233783,4617297,4404927,4805017
Durango,Noroeste,664766,684612,709521,739140,803890,829044,904866,927784


In [85]:
poblacion['1990'] = poblacion['H_1990']+poblacion['M_1990']
poblacion['2000'] = poblacion['H_2000']+poblacion['M_2000']
poblacion['2010'] = poblacion['H_2010']+poblacion['M_2010']
poblacion['2020'] = poblacion['H_2020']+poblacion['M_2020']


In [86]:
poblacion

Unnamed: 0_level_0,Regiones,H_1990,M_1990,H_2000,M_2000,H_2010,M_2010,H_2020,M_2020,1990,2000,2010,2020
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Aguascalientes,Noreste,350218,369441,456533,487752,576638,608358,696683,728924,719659,944285,1184996,1425607
Baja California,Noroeste,832090,828765,1252581,1234786,1591610,1563460,1900589,1868431,1660855,2487367,3155070,3769020
Baja California Sur,Noroeste,161833,155931,216250,207791,325433,311593,405879,392568,317764,424041,637026,798447
Campeche,Sureste,268772,266413,344334,346355,407721,414720,456939,471424,535185,690689,822441,928363
Coahuila de Zaragoza,Noreste,979097,993243,1140195,1157875,1364197,1384194,1563669,1583102,1972340,2298070,2748391,3146771
Colima,Occidente,212543,215967,268192,274435,322790,327765,360622,370769,428510,542627,650555,731391
Chiapas,Sureste,1604773,1605723,1941880,1979012,2352807,2443773,2705947,2837881,3210496,3920892,4796580,5543828
Chihuahua,Noroeste,1213302,1228571,1519972,1532935,1692545,1713920,1853822,1888047,2441873,3052907,3406465,3741869
CDMX,Centro-Sur,3939911,4295833,4110485,4494754,4233783,4617297,4404927,4805017,8235744,8605239,8851080,9209944
Durango,Noroeste,664766,684612,709521,739140,803890,829044,904866,927784,1349378,1448661,1632934,1832650


In [None]:
poblacion = poblacion.drop(['H_1990','M_1990','H_2000','M_2000','H_2010','M_2010','H_2020','M_2020','Regiones'],axis=1)

In [101]:
poblacion

Unnamed: 0_level_0,1990,2000,2010,2020
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aguascalientes,719659,944285,1184996,1425607
Baja California,1660855,2487367,3155070,3769020
Baja California Sur,317764,424041,637026,798447
Campeche,535185,690689,822441,928363
Coahuila de Zaragoza,1972340,2298070,2748391,3146771
Colima,428510,542627,650555,731391
Chiapas,3210496,3920892,4796580,5543828
Chihuahua,2441873,3052907,3406465,3741869
CDMX,8235744,8605239,8851080,9209944
Durango,1349378,1448661,1632934,1832650
