# Table of Contents

* [Imports](#Imports)
* [Data Read In](#Data-Read-In)
* Model Fitting
    * [Parameter Combination](#Parameter-Combination)
    * [Split On Sex](#Split-On-Sex)

# Imports
[Back to Top](#Table-of-Contents)

In [8]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from itertools import combinations

# Data Read In
[Back to Top](#Table-of-Contents)

In [4]:
root = "../data/"

knnDat = pd.read_csv(root + "violenceKNN.csv", index_col=0)
features = pd.read_csv(root + "Economic_Data.csv")
knnSplResp = pd.read_csv(root + "vioRespKNNSexSplit.csv")
meanSplResp = pd.read_csv(root + "vioRespMeanSexSplit.csv")
MedSplResp = pd.read_csv(root + "vioRespMedSexSplit.csv")
ModeSplResp = pd.read_csv(root + "vioRespModeSexSplit.csv")

# Model Fitting

# Parameter Combination
[Back to Top](#Table-of-Contents)

In [25]:
allFeatures = np.asarray(knnDat.drop(["sitename", "violenceScore"], axis=1).columns)

In [51]:
modelsAIC = pd.DataFrame(columns=["AIC", "features"])
for nComb in reversed(range(1, 7)):
    combs = combinations(allFeatures, nComb)
    for comb in combs:
        equ = "violenceScore ~ "
        i = 0
        for feat in comb:
            equ += feat 
            if len(comb) - 1 != i:
                equ += "+"
            i += 1
        md = smf.mixedlm(equ, knnDat, 
                         groups=knnDat["sitename"])
        mdf = md.fit(reml=False)
        modelsAIC = modelsAIC.append({'AIC' : mdf.aic, "features" : comb}, ignore_index=True)





In [53]:
modelsAIC.sort_values("AIC")

Unnamed: 0,AIC,features
101,780.824502,"(year, UnemploymentRate)"
69,782.562487,"(year, UnderAge18InPovertyPercent, Unemploymen..."
75,782.609418,"(year, UnemploymentRate, Population)"
76,782.658838,"(year, UnemploymentRate, SNAP)"
72,782.743545,"(year, MedianHouseholdIncomeInDollars, Unemplo..."
...,...,...
121,867.516626,"(UnderAge18InPovertyPercent,)"
111,869.231126,"(UnderAge18InPovertyPercent, Population)"
120,870.536130,"(AllAgesInPovertyPercent,)"
107,873.271999,"(AllAgesInPovertyPercent, Population)"


In [54]:
allFeatures = np.asarray(knnDat.drop(["sitename", "violenceScore", "year"], axis=1).columns)

modelsAIC = pd.DataFrame(columns=["AIC", "features"])
for nComb in reversed(range(1, 7)):
    combs = combinations(allFeatures, nComb)
    for comb in combs:
        equ = "violenceScore ~ "
        i = 0
        for feat in comb:
            equ += feat 
            if len(comb) - 1 != i:
                equ += "+"
            i += 1
        md = smf.mixedlm(equ, knnDat, 
                         groups=knnDat["sitename"])
        mdf = md.fit(reml=False)
        modelsAIC = modelsAIC.append({'AIC' : mdf.aic, "features" : comb}, ignore_index=True)





In [55]:
modelsAIC.sort_values("AIC")

Unnamed: 0,AIC,features
55,790.701328,"(UnemploymentRate, SNAP)"
41,792.041957,"(UnemploymentRate, Population, SNAP)"
39,792.111345,"(MedianHouseholdIncomeInDollars, UnemploymentR..."
36,792.600466,"(UnderAge18InPovertyPercent, UnemploymentRate,..."
30,792.699605,"(AllAgesInPovertyPercent, UnemploymentRate, SNAP)"
...,...,...
58,867.516626,"(UnderAge18InPovertyPercent,)"
49,869.231126,"(UnderAge18InPovertyPercent, Population)"
57,870.536130,"(AllAgesInPovertyPercent,)"
45,873.271999,"(AllAgesInPovertyPercent, Population)"


In [60]:
md = smf.mixedlm("violenceScore ~ 1", knnDat, groups=knnDat["sitename"])
mdf = md.fit(reml=False)
print(mdf.summary())#-431.9609 



           Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: violenceScore
No. Observations: 122     Method:             ML           
No. Groups:       14      Scale:              64.7749      
Min. group size:  8       Log-Likelihood:     -433.0478    
Max. group size:  9       Converged:          No           
Mean group size:  8.7                                      
------------------------------------------------------------
             Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
------------------------------------------------------------
Intercept    39.149     1.081  36.223  0.000  37.030  41.267
Group Var     8.908     0.767                               





In [61]:
mdf.aic

872.0955201835615

# Split On Sex
[Back to Top](#Table-of-Contents)

In [65]:
dropCols = ["Year", 
            "State / County Name",
            "Ages 5 to 17 in Families SAIPE Poverty Universe",
            "Ages 5 to 17 in Families in Poverty Count",
            "Ages 5 to 17 in Families in Poverty Percent",
            "All Ages in Poverty Count",
            "All Ages SAIPE Poverty Universe",
            "Under Age 18 SAIPE Poverty Universe",
            "Under Age 18 in Poverty Count"]
dat = knnSplResp.merge(features, how="inner", left_on=["year", "sitename"],
                       right_on=["Year", "State / County Name"]).drop(dropCols, axis=1)

In [71]:
dat.columns = ['year', 'sitename', 'sex', 'violenceScore',
       'AllAgesInPovertyPercent', 'UnderAge18inPovertyPercent',
       'MedianHouseholdIncomeInDollars', 'UnemploymentRate', 'Population',
       'SNAP']

In [79]:
md = smf.mixedlm("violenceScore ~ sex + UnemploymentRate + SNAP", dat, groups=dat["sitename"])
mdf = md.fit(reml=False)
print(mdf.aic)
print(mdf.summary())

1657.5762363435827
           Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: violenceScore
No. Observations:  244     Method:             ML           
No. Groups:        14      Scale:              46.1138      
Min. group size:   16      Log-Likelihood:     -822.7881    
Max. group size:   18      Converged:          Yes          
Mean group size:   17.4                                     
------------------------------------------------------------
                 Coef.  Std.Err.    z    P>|z| [0.025 0.975]
------------------------------------------------------------
Intercept        47.548    2.681  17.734 0.000 42.293 52.803
sex[T.Male]      26.111    0.869  30.032 0.000 24.407 27.816
UnemploymentRate  1.571    0.179   8.760 0.000  1.220  1.923
SNAP             -0.247    0.020 -12.379 0.000 -0.287 -0.208
Group Var         7.169    0.572                            



In [80]:
md = smf.mixedlm("violenceScore ~ sex*UnemploymentRate + sex*SNAP", dat, groups=dat["sitename"])
mdf = md.fit(reml=False)
print(mdf.aic)
print(mdf.summary())

1622.658252423583
                 Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     violenceScore
No. Observations:     244         Method:                 ML           
No. Groups:           14          Scale:                  38.9244      
Min. group size:      16          Log-Likelihood:         -803.3291    
Max. group size:      18          Converged:              Yes          
Mean group size:      17.4                                             
-----------------------------------------------------------------------
                             Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------
Intercept                    35.895    3.358 10.689 0.000 29.313 42.476
sex[T.Male]                  49.469    4.579 10.804 0.000 40.495 58.442
UnemploymentRate              1.126    0.226  4.989 0.000  0.684  1.568
sex[T.Male]:UnemploymentRate  0.870    0.307  2.830 0.005  0.26

In [81]:
md = smf.mixedlm("violenceScore ~ sex + UnemploymentRate + SNAP", dat, groups=dat["sitename"], re_formula="~sex")
mdf = md.fit(reml=False)
print(mdf.aic)
print(mdf.summary())

1660.6332095921155
               Mixed Linear Model Regression Results
Model:                MixedLM   Dependent Variable:   violenceScore
No. Observations:     244       Method:               ML           
No. Groups:           14        Scale:                45.8600      
Min. group size:      16        Log-Likelihood:       -822.3166    
Max. group size:      18        Converged:            Yes          
Mean group size:      17.4                                         
-------------------------------------------------------------------
                        Coef.  Std.Err.    z    P>|z| [0.025 0.975]
-------------------------------------------------------------------
Intercept               47.471    2.644  17.952 0.000 42.288 52.654
sex[T.Male]             26.109    0.906  28.820 0.000 24.334 27.885
UnemploymentRate         1.580    0.179   8.808 0.000  1.228  1.931
SNAP                    -0.247    0.020 -12.410 0.000 -0.286 -0.208
Group Var                4.807    0.622     

In [82]:
md = smf.mixedlm("violenceScore ~ sex*UnemploymentRate + sex*SNAP", dat, groups=dat["sitename"], re_formula="~sex")
mdf = md.fit(reml=False)
print(mdf.aic)
print(mdf.summary())

1625.7340980885758
                 Mixed Linear Model Regression Results
Model:                MixedLM     Dependent Variable:     violenceScore
No. Observations:     244         Method:                 ML           
No. Groups:           14          Scale:                  38.0806      
Min. group size:      16          Log-Likelihood:         -802.8670    
Max. group size:      18          Converged:              Yes          
Mean group size:      17.4                                             
-----------------------------------------------------------------------
                             Coef.  Std.Err.   z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------
Intercept                    35.854    3.324 10.785 0.000 29.339 42.370
sex[T.Male]                  49.555    4.590 10.798 0.000 40.560 58.551
UnemploymentRate              1.116    0.232  4.813 0.000  0.662  1.571
sex[T.Male]:UnemploymentRate  0.901    0.323  2.791 0.005  0.2

In [91]:
import statsmodels.api as sm
md = smf.mixedlm("violenceScore ~ sex*UnemploymentRate + sex*SNAP", 
                 dat, groups=dat["sitename"], re_formula="~sex")
free = (sm.regression
          .mixed_linear_model
          .MixedLMParams
          .from_components(np.ones(2), np.eye(2)))

mdf = md.fit(reml=False, free=free, method=["lbfgs"])#
print(mdf.aic)
print(mdf.summary())

ValueError: operands could not be broadcast together with shapes (6,) (2,) (6,) 

In [86]:
free


<statsmodels.regression.mixed_linear_model.MixedLMParams at 0x7f6d6d9a4a00>

In [88]:
np.ones(2)

array([1., 1.])