In [1]:
# Import libraries
import pandas as pd
import csv

# WARNING!! for some of the styling you will also need: Jinja2 and matplotlib installed


# File to Load
# Source: https://datos.gob.mx/busca/dataset/proyecciones-de-la-poblacion-de-mexico-y-de-las-entidades-federativas-2016-2050
file_to_load = "https://storage.googleapis.com/datamexico-data/conapo/ind_dem_proyecciones.csv"

# Read Purchasing File and store into Pandas data frame
ind_dem_proyecciones = pd.read_csv(file_to_load, encoding='latin-1')

In [2]:
# Print first 5 rows of file to understand structure
print(ind_dem_proyecciones.head())

   RENGLON   AÑO             ENTIDAD  CVE_GEO   CRE_NAT CRE_SOC CRE_TOT  \
0        1  1950  República Mexicana        0  771494.0  -28552  742942   
1        2  1951  República Mexicana        0  797550.0  -33378  764172   
2        3  1952  República Mexicana        0  867222.0  -34371  832851   
3        4  1953  República Mexicana        0  882943.0  -35106  847837   
4        5  1954  República Mexicana        0  974839.0  -36906  937933   

        DEF  EDAD_MED EMI_EST  ... T_CRE_TOT  T_EMI_EST  T_INM_EST  \
0  488080.0     17.11       0  ...      2.75          0          0   
1  511172.0     17.00       0  ...      2.75          0          0   
2  487182.0     17.00       0  ...      2.91          0          0   
3  518724.0     17.00       0  ...      2.88          0          0   
4  462766.0     17.00       0  ...      3.09          0          0   

   T_MIG_NET_EST  T_MIG_NET_INT    TMIH    TMIM     TMI TEF_ADO   TGF  
0              0          -0.11  138.63  120.64  129.85 

In [3]:
# show all columns
print(ind_dem_proyecciones.columns)

Index(['RENGLON', 'AÑO', 'ENTIDAD', 'CVE_GEO', 'CRE_NAT', 'CRE_SOC', 'CRE_TOT',
       'DEF', 'EDAD_MED', 'EMI_EST', 'EMI_INT', 'EVH', 'EVM', 'EV',
       'HOM_MIT_AÑO', 'IND_ENV', 'INM_EST', 'INM_INT', 'MIG_NET_EST',
       'MIG_NET_INT', 'MUJ_MIT_AÑO', 'NAC', 'POB_MIT_AÑO', 'MUJ_12_14',
       'MUJ_15_17', 'MUJ_15_19', 'MUJ_15_29', 'MUJ_15_49', 'MUJ_18_24',
       'MUJ_20_24', 'MUJ_3_5', 'MUJ_30_64', 'MUJ_6_11', 'MUJ_65_MAS',
       'HOM_12_14', 'HOM_15_17', 'HOM_15_19', 'HOM_15_29', 'HOM_15_49',
       'HOM_18_24', 'HOM_20_24', 'HOM_3_5', 'HOM_30_64', 'HOM_6_11',
       'HOM_65_MAS', 'POB_12_14', 'POB_15_17', 'POB_15_19', 'POB_15_29',
       'POB_15_49', 'POB_18_24', 'POB_20_24', 'POB_3_5', 'POB_30_64',
       'POB_6_11', 'POB_65_MAS', 'RAZ_DEP_ADU', 'RAZ_DEP_INF', 'RAZ_DEP',
       'T_BRU_MOR', 'T_BRU_NAT', 'T_CRE_NAT', 'T_CRE_SOC', 'T_CRE_TOT',
       'T_EMI_EST', 'T_INM_EST', 'T_MIG_NET_EST', 'T_MIG_NET_INT', 'TMIH',
       'TMIM', 'TMI', 'TEF_ADO', 'TGF'],
      dtype='object')


In [4]:
# only keep a subset
ind_dem_subset = ind_dem_proyecciones[['AÑO','ENTIDAD','CRE_TOT','EV','POB_MIT_AÑO','MUJ_15_29','MUJ_30_64','HOM_15_29','HOM_30_64']]
print(ind_dem_subset.head())

    AÑO             ENTIDAD CRE_TOT     EV  POB_MIT_AÑO  MUJ_15_29  MUJ_30_64  \
0  1950  República Mexicana  742942  47.34     27026573    3463334    3803433   
1  1951  República Mexicana  764172  46.90     27780135    3555366    3887150   
2  1952  República Mexicana  832851  48.98     28578641    3649012    3974952   
3  1953  República Mexicana  847837  48.36     29418995    3745691    4067546   
4  1954  República Mexicana  937933  52.00     30311877    3846467    4166394   

   HOM_15_29  HOM_30_64  
0    3508333    3661134  
1    3598248    3742914  
2    3689848    3829259  
3    3783388    3921535  
4    3881069    4020584  


In [5]:
# remove nation level (keep only states)
ind_dem_subset = ind_dem_subset.loc[ind_dem_subset["ENTIDAD"] != "República Mexicana"]
print(ind_dem_subset.head())

      AÑO         ENTIDAD CRE_TOT     EV  POB_MIT_AÑO  MUJ_15_29  MUJ_30_64  \
101  1970  Aguascalientes      ND  61.12       356491      43264      43258   
102  1971  Aguascalientes      ND  63.05       371369      45466      44625   
103  1972  Aguascalientes      ND  63.60       386794      47825      46056   
104  1973  Aguascalientes      ND  63.99       402633      50326      47551   
105  1974  Aguascalientes      ND  64.90       418900      52968      49130   

     HOM_15_29  HOM_30_64  
101      42596      39575  
102      44710      40837  
103      46959      42162  
104      49340      43542  
105      51849      45002  


In [6]:
# calc male / female totals
ind_dem_subset['MUJTOTAL'] = ind_dem_subset['MUJ_15_29'] + ind_dem_subset['MUJ_30_64']
ind_dem_subset['HOMTOTAL'] = ind_dem_subset['HOM_15_29'] + ind_dem_subset['HOM_30_64']
# ...and totals overall
ind_dem_subset['TOTAL'] = ind_dem_subset['MUJTOTAL'] + ind_dem_subset['HOMTOTAL']
print(ind_dem_subset.head())

      AÑO         ENTIDAD CRE_TOT     EV  POB_MIT_AÑO  MUJ_15_29  MUJ_30_64  \
101  1970  Aguascalientes      ND  61.12       356491      43264      43258   
102  1971  Aguascalientes      ND  63.05       371369      45466      44625   
103  1972  Aguascalientes      ND  63.60       386794      47825      46056   
104  1973  Aguascalientes      ND  63.99       402633      50326      47551   
105  1974  Aguascalientes      ND  64.90       418900      52968      49130   

     HOM_15_29  HOM_30_64  MUJTOTAL  HOMTOTAL   TOTAL  
101      42596      39575     86522     82171  168693  
102      44710      40837     90091     85547  175638  
103      46959      42162     93881     89121  183002  
104      49340      43542     97877     92882  190759  
105      51849      45002    102098     96851  198949  


## Show ranking of states by total population for 2019

In [12]:
pop_by_state_2019 = ind_dem_subset.loc[ind_dem_subset['AÑO'] == 2019,:]
pop_by_state_2019 = pop_by_state_2019[['ENTIDAD', 'TOTAL']]
pop_by_state_2019['pct_of_total'] = pop_by_state_2019['TOTAL'] / pop_by_state_2019['TOTAL'].sum()
print(pop_by_state_2019.head())

                 ENTIDAD    TOTAL  pct_of_total
150       Aguascalientes   938432      0.011163
231      Baja California  2451533      0.029162
312  Baja California Sur   537779      0.006397
393             Campeche   655234      0.007794
474             Coahuila  2087340      0.024830


In [15]:
format_dict = {'TOTAL':'{0:,.0f}', 'pct_of_total': '{:.2%}'}
(pop_by_state_2019
    .style
    .format(format_dict)
    .background_gradient(subset=['TOTAL', 'pct_of_total'], cmap='BuGn')
    .hide_index())

ENTIDAD,TOTAL,pct_of_total
Aguascalientes,938432,1.12%
Baja California,2451533,2.92%
Baja California Sur,537779,0.64%
Campeche,655234,0.78%
Coahuila,2087340,2.48%
Colima,517977,0.62%
Chiapas,3497943,4.16%
Chihuahua,2511308,2.99%
Ciudad de México,6390446,7.60%
Durango,1197531,1.42%
