In [1]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
    
# funciones desarrolladas
from functions.cargar_data import *
from functions.modelos import *

In [2]:
# carga datos diádicos
dd_deptos = pd.read_csv('tablas/dd_deptos.csv')
dd_deptos.loc[dd_deptos.largo_limite.isna(), 'largo_limite'] = 0.0001
dd_deptos.head(2)

Unnamed: 0,cod,depto_origen,depto_destino,personas_mig,personas_mig_100k,pob_origen,pob_destino,dist,pbi_origen,pbi_porcen_ori,pbi_destino,pbi_porcen_des,largo_limite,dummy_limit,empresas_origen,empresas_destino,log_pbi_destino,log_dist,nom_depto_orig,nom_depto_des
0,102,1,2,914,891,1318755,73377,610726,465848031,50.3,14214980,1.5,0.0001,False,3124,55,16.469807,13.322404,MONTEVIDEO,ARTIGAS
1,103,1,3,33127,0,1318755,520173,22544,465848031,50.3,86306492,9.3,60107.0,True,3124,563,18.273415,10.023224,MONTEVIDEO,CANELONES


## Modelo con Montevideo

In [3]:
# define el string tomado como fórmula
formula = "personas_mig ~ nom_depto_orig + dummy_limit + np.log(largo_limite) + log_pbi_destino + pbi_porcen_des + log_dist -1"

# entrena y ajusta el modelo
prodSim = smf.glm(formula=formula, data=dd_deptos, family = sm.families.Poisson()).fit()

# resultados
prodSim.summary()

0,1,2,3
Dep. Variable:,personas_mig,No. Observations:,342.0
Model:,GLM,Df Residuals:,318.0
Model Family:,Poisson,Df Model:,23.0
Link Function:,log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-9050.1
Date:,"Tue, 24 Aug 2021",Deviance:,15872.0
Time:,11:56:47,Pearson chi2:,17700.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
nom_depto_orig[ARTIGAS],-10.7752,0.192,-56.217,0.000,-11.151,-10.400
nom_depto_orig[CANELONES],-10.6602,0.183,-58.143,0.000,-11.020,-10.301
nom_depto_orig[CERRO LARGO],-11.0359,0.190,-58.063,0.000,-11.408,-10.663
nom_depto_orig[COLONIA],-11.0233,0.187,-58.839,0.000,-11.390,-10.656
nom_depto_orig[DURAZNO],-11.3842,0.188,-60.592,0.000,-11.752,-11.016
nom_depto_orig[FLORES],-12.2325,0.189,-64.824,0.000,-12.602,-11.863
nom_depto_orig[FLORIDA],-11.5357,0.187,-61.554,0.000,-11.903,-11.168
nom_depto_orig[LAVALLEJA],-11.7013,0.189,-61.994,0.000,-12.071,-11.331
nom_depto_orig[MALDONADO],-10.9373,0.188,-58.313,0.000,-11.305,-10.570


In [4]:
print_scores_simple(dd_deptos.personas_mig, prodSim.predict())

$R²$ = 0.988

RMSE = 213.5883

In [5]:
prodSim.save("modelos/modelo_X.pickle")

## Modelo sin Montevideo

In [None]:
dd_deptos_no_mvo = dd_deptos.loc[(dd_deptos.depto_origen!=1) & (dd_deptos.depto_destino!=1)]

In [None]:
# define el string tomado como fórmula
formula = "personas_mig ~ nom_depto_orig + dummy_limit + np.log(largo_limite) + log_pbi_destino + pbi_porcen_des + log_dist -1"

# entrena y ajusta el modelo
prodSim = smf.glm(formula=formula, data=dd_deptos_no_mvo, family = sm.families.Poisson()).fit()

# resultados
prodSim.summary()

In [None]:
prodSim.save("modelos/modelo_Y_noMVO.pickle")