### Importación de tabla y generación de tablas por actividad económica (PIB)

In [None]:
import pandas as pd
import chardet

def reformat_column_labels(text, text_to_delete):
    return text.replace(text_to_delete,'').rstrip().lstrip()

def create_pib_pct_df(df, total_column_label='Total nacional'):
    """
    dado un DataFrame y una columna de totales, divide todas las columnas para 
    obtener porcentajes
    total_column_label: name of the column with the total values
    """
    df_pct = df.div(df.loc[:, total_column_label], axis=0)*100
    return df_pct
    
    
    
with open('PIB_estatal_actividades_econ.csv', 'rb') as f:
    result = chardet.detect(f.read())  # or readline if the file is large


pib = pd.read_csv('PIB_estatal_actividades_econ.csv', skiprows=[0,1, 23, 24, 25], encoding=result['encoding'])\
        .set_index('Periodo')

# dividir el df en tres. Uno para cada tipo de actividad
text_to_delete = 'Cuentas nacionales > Producto interno bruto por entidad federativa, base 2013 > Por actividad económica y entidad federativa > Valores a precios constantes de 2013 >'
pib.columns = [col.replace(text_to_delete, '').replace(' r1 / p1 / ', '') for col in pib.columns.values]

# Create df with actividades primarias (levels and national pct)
pib_prim_cols = [col for col in pib.columns if 'primarias' in col]
pib_prim = pib[pib_prim_cols]
text_to_delete = 'Actividades primarias > Total actividades primaria'
pib_prim.columns = [reformat_column_labels(col, text_to_delete) for col in pib_prim.columns.values]
pib_prim.to_csv('data/PIB_primario.csv')
pib_prim_pct = create_pib_pct_df(pib_prim)
pib_prim_pct.to_csv('data/PIB_primario_pct.csv')


# Create df with actividades secundarias (levels and national pct)
pib_sec_cols = [col for col in pib.columns if 'secundarias' in col]
pib_sec = pib[pib_sec_cols]
text_to_delete = 'Actividades secundarias > Total actividades secundarias'
pib_sec.columns = [reformat_column_labels(col, text_to_delete) for col in pib_sec.columns.values]
pib_sec.to_csv('data/PIB_secundario.csv')
pib_sec_pct = create_pib_pct_df(pib_sec)
pib_sec_pct.to_csv('data/PIB_secundario_pct.csv')

# Create df with actividades terciarias (levels and national pct)
pib_ter_cols = [col for col in pib.columns if 'terciarias' in col] 
pib_ter = pib[pib_ter_cols]
text_to_delete = 'Actividades terciarias > Total actividades terciarias'
pib_ter.columns = [reformat_column_labels(col, text_to_delete) for col in pib_ter.columns.values]
pib_ter.to_csv('data/PIB_terciario.csv')
pib_ter_pct = create_pib_pct_df(pib_ter)
pib_ter_pct.to_csv('data/PIB_terciario_pct.csv')

In [31]:
## Graficar PIB primario, secundario y terciario (como porcentajes del nacional)
import plotly.graph_objs as go
import json
import pandas as pd
import numpy as np

# define user parameters
user_sectors_array = ['prim', 'sec', 'ter']
user_year = 2000

pib_dict = dict(
    prim=dict(
        levels=dict(
            file='PIB_primario.csv'
        ), 
        pct=dict(
            file='PIB_primario_pct.csv'
        )

    ), 
    sec=dict(
        levels=dict(
            file='PIB_secundario.csv'
        ), 
        pct=dict(
            file='PIB_secundario_pct.csv'
        )
    ), 
    ter=dict(
        levels=dict(
            file='PIB_terciario.csv'
        ), 
        pct=dict(
            file='PIB_terciario_pct.csv'
        )
    ), 
)


# create dataframes dict (by importing their files)
dfs = {}
for sector in ['prim', 'sec', 'ter']:
    file = pib_dict[sector]['levels']['file']
    df = pd.read_csv(f'data/{file}', index_col = 0)
    dfs[sector] = dict(df=df) 

def create_pib_trace(df, year, total_column_label='Total nacional'):
    # sector = 'prim' |'sec' | 'ter'
    x = [col for col in df.columns.values if col != total_column_label]
    y = df.loc[year,x]
    trace = go.Bar(x=x, y=y)
    return trace

def create_pib_pct_graph(sectors_array, year):
    """crea una gráfica de porcentajes de aportación al pib para los
    sectores indicados en el array y el año especificado
    sectores_array=['prim', 'sec', 'ter'] --> se puede elegir uno o más
    """
    
    # create totals dataframe
    totals = pd.DataFrame(np.zeros((dfs[sectors_array[0]]['df']).shape), \
                          index=dfs[sectors_array[0]]['df'].index, \
                          columns=dfs[sectors_array[0]]['df'].columns)
    for sector in sectors_array:
        totals = totals + dfs[sector]['df']
    
    # create pib pct df from totals dataframe
    totals_pct = create_pib_pct_df(totals)
    display(totals_pct)
    
    # create trace
    totals_pct_trace = create_pib_trace(totals_pct, year)
    data = [totals_pct_trace]
    # create layout
    layout = go.Layout(
        xaxis=dict(tickmode='linear', categoryorder='total descending')
    )
    # create figure
    fig = go.Figure(data=data, layout=layout)
    return fig
totals_fig =  create_pib_pct_graph(user_sectors_array, user_year) 
totals_fig.show()   

Unnamed: 0_level_0,Total nacional,Aguascalientes,Baja California,Baja California Sur,Campeche,Coahuila de Zaragoza,Colima,Chiapas,Chihuahua,Ciudad de México,...,Quintana Roo,San Luis Potosí,Sinaloa,Sonora,Tabasco,Tamaulipas,Tlaxcala,Veracruz de Ignacio de la Llave,Yucatán,Zacatecas
Periodo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000,100.0,0.875012,3.656923,0.564814,7.049669,3.19462,0.565312,1.964046,3.001168,17.486739,...,1.061132,1.736715,2.271884,3.388506,3.289684,3.050097,0.678779,4.960792,1.270705,0.707104
2001,100.0,0.925019,3.399839,0.598247,7.612524,3.20335,0.556528,1.958205,2.874522,17.344489,...,1.12051,1.729841,2.232357,3.291699,3.208253,2.918242,0.704725,4.887236,1.298729,0.735949
2002,100.0,0.968886,3.258234,0.582016,7.814392,3.375482,0.561548,1.988852,2.836645,17.511701,...,1.118866,1.735967,2.159707,3.038301,3.024744,3.033121,0.682716,4.864131,1.288982,0.791932
2003,100.0,0.965018,3.181076,0.605518,8.340653,3.476151,0.539313,1.975644,2.869844,16.983132,...,1.148434,1.785798,2.135877,2.910508,2.985017,3.117853,0.662899,4.88562,1.287006,0.807433
2004,100.0,0.969713,3.241251,0.624841,8.118814,3.441534,0.519467,1.826539,2.886152,17.06385,...,1.202554,1.819555,2.181177,2.946085,2.997873,3.142461,0.688008,4.933529,1.295056,0.809648
2005,100.0,0.971162,3.24406,0.654776,7.780607,3.437799,0.511384,1.800154,2.915934,16.917431,...,1.226289,1.852304,2.140504,3.033336,3.154694,3.270153,0.593954,4.861552,1.325377,0.791607
2006,100.0,0.991372,3.273324,0.672264,7.280543,3.448962,0.52065,1.783127,3.012131,17.045851,...,1.251595,1.856596,2.117174,3.083866,3.196449,3.227312,0.559097,4.913779,1.331258,0.804718
2007,100.0,1.054442,3.238152,0.745026,6.64757,3.511031,0.543874,1.77163,3.04922,16.896923,...,1.353292,1.854266,2.144048,3.054463,3.185522,3.255175,0.55435,4.950088,1.341454,0.813325
2008,100.0,1.048063,3.176868,0.756628,6.021285,3.45994,0.548185,1.793336,3.060476,17.013349,...,1.409581,1.874808,2.196668,3.032178,3.299388,3.355959,0.577992,4.890134,1.341123,0.877496
2009,100.0,1.049591,2.987468,0.793775,5.720444,3.086977,0.56011,1.880774,2.938622,17.309655,...,1.360377,1.874529,2.220501,3.006725,3.633677,3.221876,0.598888,5.04802,1.38744,0.956234


## Stacked barchart code

In [15]:
dates = ['2016-04-01', '2016-07-01', '2016-10-01']
trace1 = go.Bar(
    x=dates, y=[20, 14, 23],
    name='Brn'
)
trace2 = go.Bar(
    x=dates, y=[12, 18, 29],
    name='Wrl'
)
trace3 = go.Bar(
    x=dates, y=[20, 5, 12],
    name='Lpl'
)
trace4 = go.Bar(
    x=dates, y=[3, 18, 4],
    name='HNE'
)
trace5 = go.Bar(
    x=dates, y=[12, 3, 29],
    name='Zoo'
)

data = [trace1, trace2, trace3, trace4, trace5]
layout = go.Layout(
    barmode='stack',
    xaxis=dict(tickvals=['2016-04-01', '2016-07-01', '2016-10-01'])
)

fig = go.Figure(data=data, layout=layout)
fig.show()

### Ejemplo incompleto con API call a INEGI

In [4]:
# Ejemplo con API call
import pandas as pd
import requests
import json

# API call
# consultar: https://www.inegi.org.mx/servicios/api_indicadores.html
token = '5c520796-e0a7-f03b-0cdb-5101e2948d18'

url = f'https://www.inegi.org.mx/app/api/indicadores/desarrolladores/jsonxml/INDICATOR/472079,472080,472081,472082,472083,472084,472085,472086,472087,472088/es/0700/true/BIE/2.0/{token}?type=json'
response= requests.get(url)
print(json.dumps(response.json(),indent=2))
if response.status_code==200:
    content= json.loads(response.content)
    Series=content['Series'][0]['OBSERVATIONS']   
    
    #Obtención de la lista de observaciones 
    Observaciones=[]
    for obs in Series:  Observaciones.append(float(obs['OBS_VALUE']));
    

    #Generación del promedio de la lista de observaciones 
    sum=0.0
    for i in range(0,len(Observaciones)): sum=sum+Observaciones[i];  

    resultado=sum/len(Observaciones);
    print(resultado)

{
  "Header": {
    "Name": "Datos compactos BIE",
    "Email": "atencion.usuarios@inegi.org.mx"
  },
  "Series": [
    {
      "INDICADOR": "472079",
      "FREQ": "3",
      "TOPIC": "102000430020002000700002",
      "UNIT": "317",
      "NOTE": "",
      "SOURCE": "17",
      "LASTUPDATE": "16/12/2019 01:19:03 p. m.",
      "OBSERVATIONS": [
        {
          "TIME_PERIOD": "2018",
          "OBS_VALUE": "3.12918e+006",
          "OBS_EXCEPTION": "",
          "OBS_STATUS": "1",
          "OBS_SOURCE": "17",
          "OBS_NOTE": null,
          "COBER_GEO": "00"
        }
      ]
    },
    {
      "INDICADOR": "472080",
      "FREQ": "3",
      "TOPIC": "102000430020002000700004",
      "UNIT": "317",
      "NOTE": "",
      "SOURCE": "17",
      "LASTUPDATE": "16/12/2019 01:19:03 p. m.",
      "OBSERVATIONS": [
        {
          "TIME_PERIOD": "2018",
          "OBS_VALUE": "3.12918e+006",
          "OBS_EXCEPTION": "",
          "OBS_STATUS": "1",
          "OBS_SOURCE": "17