In [10]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

import os
from pathlib import Path

In [11]:
DATA_DIR = Path("../../data/")
DATA_DIR.mkdir(parents=True, exist_ok=True)

PLOTS_DIR = Path("../../plots/python/")
PLOTS_DIR.mkdir(parents=True, exist_ok=True)

TAB_OUT = Path("../../data/processed")
TAB_OUT.mkdir(parents=True, exist_ok=True)

LATEX_OUT = Path("../../docs/latex_utils/tables")
LATEX_OUT.mkdir(parents=True, exist_ok=True)

In [12]:
def save_plot(
    plot: plt.Figure,
    filename: str,
    format: str = "png",
    dpi: int = 300,
    close: bool = True,
):
    PLOTS_DIR.mkdir(parents=True, exist_ok=True)
    filepath = PLOTS_DIR / f"{filename}.{format}"
    try:
        plot.savefig(filepath, format=format, dpi=dpi, bbox_inches="tight")
        if close:
            plt.close(plot)
        print(
            f"Plot {filename}.{format} saved correctly in {PLOTS_DIR} + {filename}.{format}"
        )
    except Exception as e:
        print(f"Could not save plot {filename}.{format}. Reason: {e}")

In [13]:
def save_latex_table(df, filename: str, rename_map: dict, caption: str, label: str):
    try:
        table_tex = df.rename(columns=rename_map).to_latex(
            index=False,
            float_format="%.4f",
            caption=caption,
            label=label,
        )
        with open(LATEX_OUT / filename, "w") as f:
            f.write(table_tex)
        print(f"File {filename} exported correctly in {LATEX_OUT} + {filename}")
    except Exception as e:
        print(f"Could not export {filename}. Reason: {e}")

In [14]:
df = pd.read_excel(os.path.join(DATA_DIR, "DatosPracticaRLM.xlsx"), header=1)
print(df.head())

  Unnamed: 0  educación  ingreso  menores  urbano
0         ME        189     2824    350.7     508
1         NH        169     3259    345.9     564
2         VT        230     3072    348.5     322
3         MA        168     3835    335.3     846
4         RI        180     3549    327.1     871


In [15]:
rename_map = {
    "Unnamed: 0": "Estado",
    "educación": "Educación",
    "ingreso": "Ingreso",
    "menores": "Menores",
    "urbano": "Urbano",
}

save_latex_table(
    df.head(),
    filename="tabla_head_datos.tex",
    rename_map=rename_map,
    caption="Primeras filas del conjunto de datos utilizado en el análisis",
    label="tab:head_datos",
)

File tabla_head_datos.tex exported correctly in ../../docs/latex_utils/tables + tabla_head_datos.tex


In [16]:
def get_avg_per_column(df: pd.DataFrame, column_name: str) -> pd.Series:
    print("Promedio de la columna", column_name, "es:", df[column_name].mean())


get_avg_per_column(df, "ingreso")
get_avg_per_column(df, "menores")
get_avg_per_column(df, "urbano")

Promedio de la columna ingreso es: 3225.294117647059
Promedio de la columna menores es: 358.8862745098039
Promedio de la columna urbano es: 664.5098039215686
