# Regresión Poisson

Modelo simple aplicado a set de datos

## 1. Importación de bibliotecas y datos:

In [None]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.iolib.summary2 import summary_col

# Omiting WARNINGS
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Read data
fertil_df = pd.read_stata('FERTIL2.dta')
fertil_df.head()

### Datos importados:
    1. mnthborn                 month woman born
    2. yearborn                 year woman born
    3. age                      age in years
    4. electric                 =1 if has electricity
    5. radio                    =1 if has radio
    6. tv                       =1 if has tv
    7. bicycle                  =1 if has bicycle
    8. educ                     years of education
    9. ceb                      children ever born
    10. agefbrth                 age at first birth
    11. children                 number of living children
    12. knowmeth                 =1 if know about birth control
    13. usemeth                  =1 if ever use birth control
    14. monthfm                  month of first marriage
    15. yearfm                   year of first marriage
    16. agefm                    age at first marriage
    17. idlnchld                 'ideal' number of children
    18. heduc                    husband's years of education
    19. agesq                    age^2
    20. urban                    =1 if live in urban area
    21. urbeduc                  urban*educ
    22. spirit                   =1 if religion == spirit
    23. protest                  =1 if religion == protestant
    24. catholic                 =1 if religion == catholic
    25. frsthalf                 =1 if mnthborn <= 6
    26. educ0                    =1 if educ == 0
    27. evermarr                 =1 if ever married


Utilizamos los datos para estimar los efectos de la educación sobre la fertilidad de las mujeres en Botswana. La variable de respuesta es el número de niños vivos. Usamos una función de regresión exponencial estándar, y las variables explicativas son años de escolaridad (educ), una edad cuadrática e indicadores binarios para casados, que viven en un área urbana, que tienen electricidad y que poseen un televisor.

También se incluye un modelo de regresión lineal, con los errores estándar de OLS habituales.

## 2. Análisis descriptivo

In [None]:
#
fertil_df = fertil_df[['children' ,'educ', 'age', 'agesq', 'evermarr', 'urban', 'electric', 'tv']].dropna()

In [None]:
#
fertil_df.children.max(), fertil_df.children.min()

In [None]:
# plotting histogram in order to see
plt.hist(fertil_df.children, bins = 14, normed = True, histtype='bar')

#plt.legend()
plt.ylabel('Count of Children')
plt.title('Histogram number of living children')

plt.show()

In [None]:
# Scatter Plot 1:
# Getting x and y values
x_values = fertil_df['age']
y_values = fertil_df['children']

# Plot scatter plot
plt.scatter(x_values, y_values, marker = 'o')
#plt.grid()

plt.title('Relación entre número de hijos y edad de la madre')
plt.xlabel('Edad en años')
plt.ylabel('Número de hijos')

# Save the Figure
#plt.savefig("Fig1.png")

# Show plot
plt.show()

In [None]:
# Scatter Plot 2:
# Getting x and y values
x_values = fertil_df['educ']
y_values = fertil_df['children']

# Plot scatter plot
plt.scatter(x_values, y_values, marker = 'o')
#plt.grid()

plt.title('Relación entre número de hijos y educación de la madre')
plt.xlabel('Años de educación')
plt.ylabel('Número de hijos')

# Save the Figure
#plt.savefig("Fig2.png")

# Show plot
plt.show()

## 3. Estimación

In [None]:
# Definición de variables:
Y = fertil_df['children']
X = fertil_df[['educ', 'age', 'agesq', 'evermarr', 'urban', 'electric', 'tv']]
X = sm.add_constant(X)

In [None]:
# OLS:
Model_OLS = sm.OLS(Y, X)
Model_OLS_res = Model_OLS.fit()
print(Model_OLS_res.summary())

In [None]:
# Poisson:
poisson_mod = sm.Poisson(Y, X)
poisson_res = poisson_mod.fit()
print(poisson_res.summary())

In [None]:
# Resultados:
results_table = summary_col(results=[Model_OLS_res, poisson_res],
                            float_format='%0.2f',
                            stars = True,
                            model_names=['Model OLS',
                                         'Model Poisson'],
                            regressor_order=['const',
                                             'educ', 
                                             'age', 
                                             'agesq', 
                                             'evermarr', 
                                             'urban', 
                                             'electric', 
                                             'tv'])

results_table.add_title('Table 1 - Comparative OLS and Poisson Regressions')

print(results_table)

In [None]:
# Mean matrix values:
X_mean = pd.DataFrame(fertil_df[['educ', 'age', 'agesq', 'evermarr', 'urban', 'electric', 'tv']].mean()).T
X_mean['const'] = 1
X_mean = X_mean[['const', 'educ', 'age', 'agesq', 'evermarr', 'urban', 'electric', 'tv']]

In [None]:
# Predicted values OLS
Model_OLS_res.predict(X_mean)

In [None]:
# Predicted values Poisson
poisson_res.predict(X_mean)

In [None]:
poisson_margeff = poisson_res.get_margeff()
print(poisson_margeff.summary())