#### Carga de CSVs


In [None]:
import pandas as pd
import numpy as np

df_imp = pd.read_csv('./df_importacion.csv')
df_con = pd.read_csv('./df_consumo.csv')

#### Definición de códigos de faltante/especiales


In [None]:
sentinels = {
    -1: 'sin dato',
    999: 'no aplica',
}

#### Función para generar resumen por columna


In [None]:
def resumir_columna(col, serie):
    d = {}
    d['Nombre'] = col
    d['Tipo de dato'] = str(serie.dtype)
    # conteos y nulos
    d['COUNT'] = int(serie.count())
    d['Missing (NaN)'] = int(serie.isna().sum())
    # buscar sentinels
    d['Valores especiales/faltante'] = {
        code: int((serie == code).sum())
        for code in sentinels if code in serie.values
    }
    # estadísitcas básicas para numéricos
    if pd.api.types.is_numeric_dtype(serie):
        d.update({
            'MIN': serie.min(),
            'MAX': serie.max(),
            'AVG': serie.mean(),
            'MEDIAN': serie.median(),
        })
    else:
        d.update({
            'UNIQUE': int(serie.nunique()),
            'TOP':    serie.mode().iat[0] if not serie.mode().empty else None
        })
    # posibles valores (muestra hasta 10)
    d['Valores posibles (muestra)'] = list(serie.dropna().unique()[:10])
    return d



#### Generar codebook para cada DataFrame

In [None]:
def generar_codebook(df):
    resumen = [resumir_columna(c, df[c]) for c in df.columns]
    return pd.DataFrame(resumen)

codebook_imp = generar_codebook(df_imp)
codebook_con = generar_codebook(df_con)

Unnamed: 0,Nombre,Tipo de dato,COUNT,Missing (NaN),Valores especiales/faltante,UNIQUE,TOP,Valores posibles (muestra),MIN,MAX,AVG,MEDIAN
0,Fecha,object,300,0,{},300.0,2000-01-01,"[2000-01-01, 2000-02-01, 2000-03-01, 2000-04-0...",,,,
1,Gasolina regular,float64,300,0,{},,,"[202645.2, 205530.96, 229499.56, 210680.4, 208...",160741.9,938086.57,397478.894458,288193.13
2,Gasolina superior,float64,300,0,{},,,"[308156.82, 307766.31, 331910.29, 315648.08, 3...",300242.781667,786598.12,470248.678849,423319.54
3,Diesel,float64,300,0,{},,,"[634667.06, 642380.66, 699807.25, 586803.98, 6...",507662.65,1499266.1,882130.153945,811038.175


#### Codebook Importaciones

In [None]:
display(codebook_imp)


Unnamed: 0,Nombre,Tipo de dato,COUNT,Missing (NaN),Valores especiales/faltante,UNIQUE,TOP,Valores posibles (muestra),MIN,MAX,AVG,MEDIAN
0,Fecha,object,288,0,{},288.0,2001-01-01,"[2001-01-01, 2001-02-01, 2001-03-01, 2001-04-0...",,,,
1,Gasolina regular,float64,288,0,{},,,"[177776.5, 123115.99, 161726.42, 127338.74, 16...",81015.3,1141365.94,412949.327027,327559.34
2,Gasolina superior,float64,288,0,{},,,"[373963.96, 243091.07, 312084.38, 285054.89, 3...",170292.5,1227173.53,490954.382872,481300.21
3,Diesel,float64,288,0,{},,,"[566101.99, 489525.8, 575559.68, 437745.42, 55...",229764.74,1630635.64,895095.716201,846286.105


#### Codebook Importaciones

In [6]:
display(codebook_con)

Unnamed: 0,Nombre,Tipo de dato,COUNT,Missing (NaN),Valores especiales/faltante,UNIQUE,TOP,Valores posibles (muestra),MIN,MAX,AVG,MEDIAN
0,Fecha,object,300,0,{},300.0,2000-01-01,"[2000-01-01, 2000-02-01, 2000-03-01, 2000-04-0...",,,,
1,Gasolina regular,float64,300,0,{},,,"[202645.2, 205530.96, 229499.56, 210680.4, 208...",160741.9,938086.57,397478.894458,288193.13
2,Gasolina superior,float64,300,0,{},,,"[308156.82, 307766.31, 331910.29, 315648.08, 3...",300242.781667,786598.12,470248.678849,423319.54
3,Diesel,float64,300,0,{},,,"[634667.06, 642380.66, 699807.25, 586803.98, 6...",507662.65,1499266.1,882130.153945,811038.175


#### Exportación como CSVs

In [7]:
codebook_con.to_csv('codebook_con.csv', index=False)
codebook_imp.to_csv('codebook_imp.csv', index=False)