Aleksandra Bednarczuk

# Predictors of elderly citizens willingness to stay in the neighbourhood
# Logistic regression

Original dataset, including barris among predictors

In [34]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [35]:
from imblearn.over_sampling import SMOTE

import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import r2_score

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import f_classif
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder

from scipy import stats

In [36]:
data = pd.read_pickle("survey_transformed.pkl")

In [37]:
data.reset_index(drop=True, inplace=True)

In [38]:
data.head()

Unnamed: 0,Would_live_in_another_neighborhood,District,Woman,Car,Support_with_housework,Owner,House_size,Living_alone,Financial_situation,Disability,...,Barri_comparison,Barri_association,Barri_transport,BCN_transport,BCN_over_barri_evolution,BCN_over_barri_future,BCN_over_barri_satisfaction,BCN_over_barri_transport,BCN_over_barri_management,BCN_over_barri_security
0,0.0,SANTS - MONTJUÏC,0,1,0.0,1.0,51-100 M2,0,GOOD,0.0,...,ONE OF THE BEST,1.0,GOOD,GOOD,0,0,0,0,0,0
1,0.0,SANTS - MONTJUÏC,1,0,0.0,1.0,51-100 M2,1,BAD,0.0,...,ONE OF THE BEST,0.0,GOOD,GOOD,1,0,0,0,0,0
2,0.0,SANTS - MONTJUÏC,1,0,1.0,0.0,51-100 M2,1,GOOD,0.0,...,"NEITHER THE BEST, NOR THE WORST",0.0,BAD,"NEITHER BAD, NOR GOOD",0,0,0,1,0,0
3,1.0,SANTS - MONTJUÏC,0,0,0.0,1.0,< 50 M2,0,BAD,1.0,...,"NEITHER THE BEST, NOR THE WORST",0.0,"NEITHER BAD, NOR GOOD",GOOD,0,0,1,0,0,0
4,0.0,SANTS - MONTJUÏC,1,0,1.0,1.0,51-100 M2,0,GOOD,0.0,...,"NEITHER THE BEST, NOR THE WORST",0.0,GOOD,GOOD,0,1,0,0,0,0


In [39]:
data.shape

(1174, 32)

In [40]:
data.columns.tolist()

['Would_live_in_another_neighborhood',
 'District',
 'Woman',
 'Car',
 'Support_with_housework',
 'Owner',
 'House_size',
 'Living_alone',
 'Financial_situation',
 'Disability',
 'Living_with_disabled_person',
 'Time_living_in_barri',
 'Barri_evolution',
 'BCN_evolution',
 'Barri_future',
 'BCN_future',
 'Barri_satisfaction',
 'BCN_satisfaction',
 'Barri_security',
 'BCN_security',
 'Barri_management',
 'BCN_management',
 'Barri_comparison',
 'Barri_association',
 'Barri_transport',
 'BCN_transport',
 'BCN_over_barri_evolution',
 'BCN_over_barri_future',
 'BCN_over_barri_satisfaction',
 'BCN_over_barri_transport',
 'BCN_over_barri_management',
 'BCN_over_barri_security']

In [41]:
for i in data.columns.tolist():
    display(pd.crosstab(data[i], data.Would_live_in_another_neighborhood))

Would_live_in_another_neighborhood,0.0,1.0
Would_live_in_another_neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,1019,0
1.0,0,155


Would_live_in_another_neighborhood,0.0,1.0
District,Unnamed: 1_level_1,Unnamed: 2_level_1
CIUTAT VELLA,61,8
EIXAMPLE,179,20
GRÀCIA,84,12
HORTA - GUINARDÓ,112,21
LES CORTS,104,4
NOU BARRIS,94,23
SANT ANDREU,83,22
SANT MARTÍ,129,18
SANTS - MONTJUÏC,80,22
SARRIÀ - SANT GERVASI,93,5


Would_live_in_another_neighborhood,0.0,1.0
Woman,Unnamed: 1_level_1,Unnamed: 2_level_1
0,399,61
1,620,94


Would_live_in_another_neighborhood,0.0,1.0
Car,Unnamed: 1_level_1,Unnamed: 2_level_1
0,479,63
1,540,92


Would_live_in_another_neighborhood,0.0,1.0
Support_with_housework,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,656,110
1.0,363,45


Would_live_in_another_neighborhood,0.0,1.0
Owner,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,210,28
1.0,809,127


Would_live_in_another_neighborhood,0.0,1.0
House_size,Unnamed: 1_level_1,Unnamed: 2_level_1
101-150 M2,126,15
51-100 M2,724,115
< 50 M2,57,14
> 150 M2,43,5
I DON'T KNOW,69,6


Would_live_in_another_neighborhood,0.0,1.0
Living_alone,Unnamed: 1_level_1,Unnamed: 2_level_1
0,666,103
1,353,52


Would_live_in_another_neighborhood,0.0,1.0
Financial_situation,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,390,63
GOOD,629,92


Would_live_in_another_neighborhood,0.0,1.0
Disability,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,856,121
1.0,163,34


Would_live_in_another_neighborhood,0.0,1.0
Living_with_disabled_person,Unnamed: 1_level_1,Unnamed: 2_level_1
0,722,105
1,297,50


Would_live_in_another_neighborhood,0.0,1.0
Time_living_in_barri,Unnamed: 1_level_1,Unnamed: 2_level_1
I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD,136,7
LESS THAN 10 YEARS,57,16
MORE THAN 10 YEARS,826,132


Would_live_in_another_neighborhood,0.0,1.0
Barri_evolution,Unnamed: 1_level_1,Unnamed: 2_level_1
IT HAS IMPROVED,411,59
IT HAS WORSENED,230,46
IT IS THE SAME,378,50


Would_live_in_another_neighborhood,0.0,1.0
BCN_evolution,Unnamed: 1_level_1,Unnamed: 2_level_1
IT HAS IMPROVED,272,36
IT HAS WORSENED,517,85
IT IS THE SAME,230,34


Would_live_in_another_neighborhood,0.0,1.0
Barri_future,Unnamed: 1_level_1,Unnamed: 2_level_1
IT WILL IMPROVE,577,79
IT WILL STAY THE SAME,288,45
IT WILL WORSEN,154,31


Would_live_in_another_neighborhood,0.0,1.0
BCN_future,Unnamed: 1_level_1,Unnamed: 2_level_1
IT WILL IMPROVE,546,92
IT WILL STAY THE SAME,241,32
IT WILL WORSEN,232,31


Would_live_in_another_neighborhood,0.0,1.0
Barri_satisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
FAIRLY SATISFIED,75,25
NOT SATISFIED,1,9
SATISFIED,943,121


Would_live_in_another_neighborhood,0.0,1.0
BCN_satisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
FAIRLY SATISFIED,118,22
NOT SATISFIED,8,1
SATISFIED,893,132


Would_live_in_another_neighborhood,0.0,1.0
Barri_security,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,152,40
GOOD,455,49
"NEITHER BAD, NOR GOOD",412,66


Would_live_in_another_neighborhood,0.0,1.0
BCN_security,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,199,40
GOOD,285,38
"NEITHER BAD, NOR GOOD",535,77


Would_live_in_another_neighborhood,0.0,1.0
Barri_management,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,65,17
GOOD,509,56
"NEITHER BAD, NOR GOOD",445,82


Would_live_in_another_neighborhood,0.0,1.0
BCN_management,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,96,21
GOOD,431,46
"NEITHER BAD, NOR GOOD",492,88


Would_live_in_another_neighborhood,0.0,1.0
Barri_comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
"NEITHER THE BEST, NOR THE WORST",341,94
ONE OF THE BEST,649,41
ONE OF THE WORST,29,20


Would_live_in_another_neighborhood,0.0,1.0
Barri_association,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,805,128
1.0,214,27


Would_live_in_another_neighborhood,0.0,1.0
Barri_transport,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,149,22
GOOD,812,120
"NEITHER BAD, NOR GOOD",58,13


Would_live_in_another_neighborhood,0.0,1.0
BCN_transport,Unnamed: 1_level_1,Unnamed: 2_level_1
BAD,95,21
GOOD,824,125
"NEITHER BAD, NOR GOOD",100,9


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_evolution,Unnamed: 1_level_1,Unnamed: 2_level_1
0,913,140
1,106,15


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_future,Unnamed: 1_level_1,Unnamed: 2_level_1
0,871,126
1,148,29


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_satisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
0,968,126
1,51,29


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_transport,Unnamed: 1_level_1,Unnamed: 2_level_1
0,921,146
1,98,9


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_management,Unnamed: 1_level_1,Unnamed: 2_level_1
0,983,147
1,36,8


Would_live_in_another_neighborhood,0.0,1.0
BCN_over_barri_security,Unnamed: 1_level_1,Unnamed: 2_level_1
0,973,145
1,46,10


In [42]:
data.columns.tolist()

['Would_live_in_another_neighborhood',
 'District',
 'Woman',
 'Car',
 'Support_with_housework',
 'Owner',
 'House_size',
 'Living_alone',
 'Financial_situation',
 'Disability',
 'Living_with_disabled_person',
 'Time_living_in_barri',
 'Barri_evolution',
 'BCN_evolution',
 'Barri_future',
 'BCN_future',
 'Barri_satisfaction',
 'BCN_satisfaction',
 'Barri_security',
 'BCN_security',
 'Barri_management',
 'BCN_management',
 'Barri_comparison',
 'Barri_association',
 'Barri_transport',
 'BCN_transport',
 'BCN_over_barri_evolution',
 'BCN_over_barri_future',
 'BCN_over_barri_satisfaction',
 'BCN_over_barri_transport',
 'BCN_over_barri_management',
 'BCN_over_barri_security']

In [43]:
x_var = ['District',
 'Woman',
 'Car',
 'Support_with_housework',
 'Owner',
 'House_size',
 'Living_alone',
 'Financial_situation',
 'Disability',
 'Living_with_disabled_person',
 'Time_living_in_barri']
#  'Barri_evolution',
#  'BCN_evolution',
#  'Barri_future',
#  'BCN_future',
#  'Barri_satisfaction',
#  'BCN_satisfaction',
#  'Barri_security',
#  'BCN_security',
#  'Barri_management',
#  'BCN_management',
#  'Barri_comparison',
#  'Barri_association',
#  'Barri_transport',
#  'BCN_transport',
#  'BCN_over_barri_evolution',
#  'BCN_over_barri_future',
#  'BCN_over_barri_satisfaction',
#  'BCN_over_barri_transport',
#  'BCN_over_barri_management',
#  'BCN_over_barri_security']

In [44]:
y = data['Would_live_in_another_neighborhood']

In [45]:
x = pd.get_dummies(data[x_var])

In [46]:
x.columns

Index(['Woman', 'Car', 'Support_with_housework', 'Owner', 'Living_alone',
       'Disability', 'Living_with_disabled_person', 'District_CIUTAT VELLA',
       'District_EIXAMPLE', 'District_GRÀCIA', 'District_HORTA - GUINARDÓ',
       'District_LES CORTS', 'District_NOU BARRIS', 'District_SANT ANDREU',
       'District_SANT MARTÍ', 'District_SANTS - MONTJUÏC',
       'District_SARRIÀ - SANT GERVASI', 'House_size_101-150 M2',
       'House_size_51-100 M2', 'House_size_< 50 M2', 'House_size_> 150 M2',
       'House_size_I DON'T KNOW', 'Financial_situation_BAD',
       'Financial_situation_GOOD',
       'Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD',
       'Time_living_in_barri_LESS THAN 10 YEARS',
       'Time_living_in_barri_MORE THAN 10 YEARS'],
      dtype='object')

In [47]:
for i in ['District_EIXAMPLE',
          'House_size_< 50 M2',
          'Financial_situation_GOOD',
          'Time_living_in_barri_LESS THAN 10 YEARS']:
    del x[i]

In [48]:
x = sm.add_constant(x)

In [49]:
# logit_model = sm.Logit(y,x)
# result=logit_model.fit()
# print(result.summary2())

In [50]:
# logit_roc_auc = roc_auc_score(y, result.predict(x))
# fpr, tpr, thresholds = roc_curve(y, result.predict(x))
# plt.figure()
# plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
# plt.plot([0, 1], [0, 1],'r--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver operating characteristic')
# plt.legend(loc="lower right")
# #plt.savefig('Log_ROC')
# plt.show()

In [51]:
# result.pvalues.sort_values(ascending=False)

In [52]:
# display(result.get_margeff().summary())

In [53]:
# result.pred_table()

# Logit

In [54]:
glm_logit = sm.GLM(y, x, family=sm.families.Binomial())
glm_logit_results = glm_logit.fit()
glm_logit_results.summary()

0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1150.0
Model Family:,Binomial,Df Model:,23.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-425.16
Date:,"Sun, 09 May 2021",Deviance:,850.33
Time:,08:50:27,Pearson chi2:,1170.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.4857,0.533,-2.787,0.005,-2.531,-0.441
Woman,0.1320,0.191,0.689,0.491,-0.243,0.507
Car,0.4972,0.212,2.340,0.019,0.081,0.914
Support_with_housework,-0.0337,0.209,-0.161,0.872,-0.443,0.376
Owner,0.0094,0.257,0.037,0.971,-0.495,0.514
Living_alone,0.0716,0.222,0.323,0.746,-0.363,0.506
Disability,0.5546,0.345,1.606,0.108,-0.122,1.231
Living_with_disabled_person,-0.2527,0.304,-0.831,0.406,-0.849,0.344
District_CIUTAT VELLA,0.2954,0.470,0.629,0.530,-0.626,1.216


In [55]:
glm_logit_results.aic

898.3268395535035

In [56]:
glm_logit_results.bic

-7278.0709608927455

In [57]:
glm_logit_results.pvalues.sort_values(ascending=False)

Owner                                                           0.970855
Support_with_housework                                          0.871896
Financial_situation_BAD                                         0.799835
Living_alone                                                    0.746448
House_size_> 150 M2                                             0.701904
District_CIUTAT VELLA                                           0.529577
District_GRÀCIA                                                 0.518833
Woman                                                           0.490599
House_size_101-150 M2                                           0.454062
District_SANT MARTÍ                                             0.414806
Living_with_disabled_person                                     0.406247
House_size_51-100 M2                                            0.299649
District_HORTA - GUINARDÓ                                       0.183444
District_SARRIÀ - SANT GERVASI                     

In [58]:
for i in ['Owner', 'Support_with_housework', 'Financial_situation_BAD', 'Living_alone', 'Woman', 'Living_with_disabled_person']:
    del x[i]

In [59]:
glm_logit = sm.GLM(y, x, family=sm.families.Binomial())
glm_logit_results = glm_logit.fit()
glm_logit_results.summary()

0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1156.0
Model Family:,Binomial,Df Model:,17.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-426.03
Date:,"Sun, 09 May 2021",Deviance:,852.07
Time:,08:50:28,Pearson chi2:,1170.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.3288,0.465,-2.855,0.004,-2.241,-0.417
Car,0.4223,0.189,2.234,0.025,0.052,0.793
Disability,0.3448,0.222,1.552,0.121,-0.091,0.780
District_CIUTAT VELLA,0.2722,0.464,0.587,0.557,-0.637,1.182
District_GRÀCIA,0.2637,0.394,0.670,0.503,-0.508,1.035
District_HORTA - GUINARDÓ,0.4515,0.340,1.327,0.185,-0.215,1.118
District_LES CORTS,-1.1582,0.566,-2.048,0.041,-2.267,-0.050
District_NOU BARRIS,0.7393,0.339,2.182,0.029,0.075,1.404
District_SANT ANDREU,0.8700,0.343,2.538,0.011,0.198,1.542


In [60]:
glm_logit_results.aic

888.0677860387129

In [61]:
glm_logit_results.bic

-7318.7390464098635

In [62]:
glm_logit_results.pvalues.sort_values(ascending=False)

House_size_> 150 M2                                             0.634770
District_CIUTAT VELLA                                           0.557477
District_GRÀCIA                                                 0.503079
District_SANT MARTÍ                                             0.398134
House_size_101-150 M2                                           0.331498
House_size_51-100 M2                                            0.223810
District_HORTA - GUINARDÓ                                       0.184582
District_SARRIÀ - SANT GERVASI                                  0.133777
Disability                                                      0.120611
House_size_I DON'T KNOW                                         0.047918
District_LES CORTS                                              0.040586
District_NOU BARRIS                                             0.029142
Car                                                             0.025468
Time_living_in_barri_MORE THAN 10 YEARS            

In [63]:
for i in ['House_size_> 150 M2', 'House_size_101-150 M2', 'House_size_51-100 M2', "House_size_I DON'T KNOW"]:
    del x[i]

In [64]:
glm_logit = sm.GLM(y, x, family=sm.families.Binomial())
glm_logit_results = glm_logit.fit()
glm_logit_results.summary()

0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1160.0
Model Family:,Binomial,Df Model:,13.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-428.18
Date:,"Sun, 09 May 2021",Deviance:,856.37
Time:,08:50:28,Pearson chi2:,1180.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.7775,0.370,-4.808,0.000,-2.502,-1.053
Car,0.4400,0.185,2.382,0.017,0.078,0.802
Disability,0.3390,0.221,1.533,0.125,-0.095,0.772
District_CIUTAT VELLA,0.3794,0.455,0.833,0.405,-0.513,1.272
District_GRÀCIA,0.2740,0.393,0.698,0.485,-0.496,1.044
District_HORTA - GUINARDÓ,0.4616,0.338,1.365,0.172,-0.201,1.124
District_LES CORTS,-1.1238,0.564,-1.991,0.046,-2.230,-0.018
District_NOU BARRIS,0.7522,0.336,2.237,0.025,0.093,1.411
District_SANT ANDREU,0.8877,0.340,2.608,0.009,0.221,1.555


In [65]:
glm_logit_results.aic

884.3686387454122

In [66]:
glm_logit_results.bic

-7342.7108817047165

In [67]:
glm_logit_results.pvalues.sort_values(ascending=False)

District_GRÀCIA                                                 0.485428
District_SANT MARTÍ                                             0.416040
District_CIUTAT VELLA                                           0.404659
District_HORTA - GUINARDÓ                                       0.172308
District_SARRIÀ - SANT GERVASI                                  0.143526
Disability                                                      0.125353
District_LES CORTS                                              0.046436
District_NOU BARRIS                                             0.025286
Time_living_in_barri_MORE THAN 10 YEARS                         0.024288
Car                                                             0.017197
District_SANT ANDREU                                            0.009096
District_SANTS - MONTJUÏC                                       0.003024
Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD    0.000070
const                                              

In [68]:
for i in ['Disability']:
    del x[i]

In [69]:
glm_logit = sm.GLM(y, x, family=sm.families.Binomial())
glm_logit_results = glm_logit.fit()
glm_logit_results.summary()

0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1161.0
Model Family:,Binomial,Df Model:,12.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-429.32
Date:,"Sun, 09 May 2021",Deviance:,858.63
Time:,08:51:44,Pearson chi2:,1190.0
No. Iterations:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.7166,0.367,-4.684,0.000,-2.435,-0.998
Car,0.4265,0.184,2.315,0.021,0.065,0.787
District_CIUTAT VELLA,0.3700,0.454,0.815,0.415,-0.520,1.260
District_GRÀCIA,0.2707,0.392,0.690,0.490,-0.498,1.039
District_HORTA - GUINARDÓ,0.4655,0.338,1.378,0.168,-0.197,1.128
District_LES CORTS,-1.1402,0.564,-2.021,0.043,-2.246,-0.034
District_NOU BARRIS,0.7955,0.334,2.378,0.017,0.140,1.451
District_SANT ANDREU,0.8907,0.340,2.621,0.009,0.225,1.557
District_SANT MARTÍ,0.2547,0.348,0.732,0.464,-0.427,0.937


In [70]:
glm_logit_results.aic

884.6302192740594

In [71]:
glm_logit_results.bic

-7347.517473176457

In [72]:
glm_logit_results.pvalues.sort_values(ascending=False)

District_GRÀCIA                                                 0.490156
District_SANT MARTÍ                                             0.464119
District_CIUTAT VELLA                                           0.415320
District_HORTA - GUINARDÓ                                       0.168218
District_SARRIÀ - SANT GERVASI                                  0.126210
District_LES CORTS                                              0.043275
Time_living_in_barri_MORE THAN 10 YEARS                         0.026481
Car                                                             0.020596
District_NOU BARRIS                                             0.017396
District_SANT ANDREU                                            0.008772
District_SANTS - MONTJUÏC                                       0.003119
Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD    0.000085
const                                                           0.000003
dtype: float64

# Probit

In [177]:
glm_probit = sm.GLM(y, x, family=sm.families.Binomial(link=sm.genmod.families.links.probit))
glm_probit_results = glm_probit.fit()
glm_probit_results.summary()

Use an instance of a link class instead.
  """Entry point for launching an IPython kernel.


0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1165.0
Model Family:,Binomial,Df Model:,8.0
Link Function:,probit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-397.92
Date:,"Sat, 08 May 2021",Deviance:,795.85
Time:,23:03:40,Pearson chi2:,1240.0
No. Iterations:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.8217,0.138,-5.952,0.000,-1.092,-0.551
Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD,-0.9719,0.257,-3.779,0.000,-1.476,-0.468
Time_living_in_barri_MORE THAN 10 YEARS,-0.4298,0.180,-2.385,0.017,-0.783,-0.077
Barri_satisfaction_FAIRLY SATISFIED,0.3089,0.157,1.961,0.050,0.000,0.618
Barri_satisfaction_NOT SATISFIED,1.9358,0.567,3.414,0.001,0.824,3.047
Barri_security_BAD,-0.1648,0.106,-1.561,0.119,-0.372,0.042
Barri_security_GOOD,-0.3620,0.077,-4.728,0.000,-0.512,-0.212
"Barri_security_NEITHER BAD, NOR GOOD",-0.2949,0.082,-3.585,0.000,-0.456,-0.134
"Barri_comparison_NEITHER THE BEST, NOR THE WORST",0.7154,0.105,6.802,0.000,0.509,0.922


In [178]:
glm_probit_results.aic

813.846236087375

In [179]:
glm_probit_results.bic

-7438.574144364695

# Scobit

In [180]:
glm_scobit = sm.GLM(y, x, family=sm.families.Binomial(link=sm.genmod.families.links.cloglog))
glm_scobit_results = glm_scobit.fit()
glm_scobit_results.summary()

Use an instance of a link class instead.
  """Entry point for launching an IPython kernel.


0,1,2,3
Dep. Variable:,Would_live_in_another_neighborhood,No. Observations:,1174.0
Model:,GLM,Df Residuals:,1165.0
Model Family:,Binomial,Df Model:,8.0
Link Function:,cloglog,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-397.69
Date:,"Sat, 08 May 2021",Deviance:,795.38
Time:,23:03:46,Pearson chi2:,1200.0
No. Iterations:,16,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-1.5480,0.221,-7.010,0.000,-1.981,-1.115
Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD,-1.6844,0.458,-3.681,0.000,-2.581,-0.787
Time_living_in_barri_MORE THAN 10 YEARS,-0.6731,0.273,-2.467,0.014,-1.208,-0.138
Barri_satisfaction_FAIRLY SATISFIED,0.4925,0.232,2.121,0.034,0.037,0.948
Barri_satisfaction_NOT SATISFIED,2.0009,0.494,4.049,0.000,1.032,2.969
Barri_security_BAD,-0.3550,0.167,-2.132,0.033,-0.681,-0.029
Barri_security_GOOD,-0.6755,0.128,-5.268,0.000,-0.927,-0.424
"Barri_security_NEITHER BAD, NOR GOOD",-0.5175,0.133,-3.877,0.000,-0.779,-0.256
"Barri_comparison_NEITHER THE BEST, NOR THE WORST",1.2797,0.191,6.686,0.000,0.905,1.655


In [181]:
glm_scobit_results.aic

813.3819369263255

In [182]:
glm_scobit_results.bic

-7439.038443525744

In [183]:
np.exp(glm_scobit_results.params)

const                                                           0.212680
Time_living_in_barri_I HAVE ALWAYS LIVED IN THE NEIGHBORHOOD    0.185563
Time_living_in_barri_MORE THAN 10 YEARS                         0.510102
Barri_satisfaction_FAIRLY SATISFIED                             1.636330
Barri_satisfaction_NOT SATISFIED                                7.395376
Barri_security_BAD                                              0.701192
Barri_security_GOOD                                             0.508885
Barri_security_NEITHER BAD, NOR GOOD                            0.596033
Barri_comparison_NEITHER THE BEST, NOR THE WORST                3.595644
Barri_comparison_ONE OF THE WORST                               4.690904
dtype: float64