# Neighborhoods question
Q (from a meeting agenda Bloomberg_ Gun Violence Dashboard_ Meeting 2 Agenda.docx): 
```
Final Neighborhood Questions:
1. Not duplicating MSU Project Green Light (PGL) study, what new can we learn from PGL data that give insight to where interventions should be targeted?
2. Are there specific locations/ property types that become magnets for gun violence incidents?
3. What can we find unique to areas/ blocks where we see highest event rates (presence of empty lots, convenience stores, gas stations, bus stops, schools, etc)?
```

If we're looking to identify where interventions should be targeted... Detroit has the gun violence rates, so the clearly correct thing to do is target interventions where gun violence rates are high.

Another way of viewing this question is to inform policy choices (change characteristics based on correlations and hope it's causal). This would require _way_ more work than we have time for

In [257]:
import geopandas as gpd

from features.income import Income
from features.violence_calls import ViolenceCalls
from features.households import Households
from features.population import Population
from features.population_density import PopulationDensity
from features.feature_constructor import Feature
from features.out_of_state_rental_ownership import OutOfStateRentalOwnership
from features.ddot_bus_stops import DDotBusStops
from features.smart_bus_stops import SmartBusStops
from features.rental_statuses import RentalStatuses
from features.project_green_light_locations import ProjectGreenlightLocations
from util_detroit import concatenate_features

from detroit_geos import get_detroit_census_geos, get_detroit_boundaries

import patsy
from patsy import dmatrices, dmatrix
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
from sklearn.impute import SimpleImputer

In [2]:
# Fetch geometries for visualization

tracts = get_detroit_census_geos(decennial_census_year=2010,target_geo_grain='tract',).set_index('geo_id')
detroit = get_detroit_boundaries()
block = get_detroit_census_geos(decennial_census_year=2010,target_geo_grain='block').set_index('geo_id')

# Build Features

In [61]:
ViolenceCalls(decennial_census_year=2010, verbose=False).cache_features()

Loaded 371,176 rows of data


In [70]:
ViolenceCalls(decennial_census_year=2010, verbose=False).construct_feature(target_geo_grain='tract')

KeyboardInterrupt: 

In [258]:
target_geo_grain = "tract"
feature_objects = [
    Population(
        decennial_census_year=2010, population_data_path="population", verbose=False
    ),
    PopulationDensity(
        decennial_census_year=2010,
        population_data_path="population",
    ),
    ViolenceCalls(decennial_census_year=2010, verbose=False),
    Income(verbose=False),
    Households(),
    OutOfStateRentalOwnership(2010),
    DDotBusStops(decennial_census_year=2010),
    SmartBusStops(decennial_census_year=2010),
    RentalStatuses(decennial_census_year=2010),
    ProjectGreenlightLocations(decennial_census_year=2010),
]

feat_df = concatenate_features(feature_objects,'tract')

# Transform

In [259]:
from munge_features import transform_1
transform_1(feat_df)

Unnamed: 0_level_0,population_density,per_household_income,out_of_state_rental_ownership,call_rate,married_household_prop,rental_density,bus_density,greenlight_density,people_per_household
tract,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2.616350e+10,1824.898354,49015.0,0.278947,0.559772,0.173157,419.793244,28.722696,2.209438,2.803894
2.616350e+10,1856.991606,61980.0,0.326087,0.727783,0.174632,158.882786,27.631789,17.269868,2.947610
2.616350e+10,1329.122055,50345.0,0.157895,0.620975,0.075216,179.375550,40.123478,18.881637,3.868064
2.616350e+10,917.210175,34880.0,0.125000,0.618990,0.120000,14.513576,41.726532,0.000000,4.437333
2.616350e+10,1476.359684,43058.0,0.346939,0.657596,0.120553,73.183386,28.377231,1.493538,4.357708
...,...,...,...,...,...,...,...,...,...
2.616358e+10,1005.423452,47359.0,0.222222,0.000347,0.203540,0.000000,0.000000,0.000000,2.315366
2.616358e+10,316.470057,103943.0,0.222222,0.007367,0.439142,0.000000,0.000000,0.000000,2.474531
2.616398e+10,34.136901,40003.0,0.222222,4.636364,0.174037,0.000000,18.044989,0.000000,5.000000
2.616399e+10,22.931242,45320.0,1.000000,1.723577,0.150685,10.727722,209.190587,5.363861,0.842466


In [260]:
formula_bones = "call_rate ~ " + '\n+ '.join([x for x in df.columns if ('call_rate' not in x)])
formula = formula_bones.replace('per_household_income','np.log(per_household_income)')
y, X = dmatrices(
    formula_like=formula,
    data=df,
    return_type="dataframe",
)
D = dmatrix(
    formula_like=formula.replace('~','+'),
    data=df,
    return_type="dataframe",
)

In [261]:
(D.corr()**2).call_rate.sort_values()

people_per_household             0.000724
out_of_state_rental_ownership    0.004437
rental_density                   0.041099
greenlight_density               0.047976
bus_density                      0.052926
population_density               0.054827
np.log(per_household_income)     0.067554
married_household_prop           0.182031
call_rate                        1.000000
Intercept                             NaN
Name: call_rate, dtype: float64

# Run regressions

In [247]:
y, X = dmatrices(
    formula_like=formula,
    data=df,
    return_type="dataframe",
)
mdl = sm.OLS(endog=y,exog=X)
res = mdl.fit()
res.summary()

0,1,2,3
Dep. Variable:,call_rate,R-squared:,0.265
Model:,OLS,Adj. R-squared:,0.247
Method:,Least Squares,F-statistic:,14.96
Date:,"Wed, 02 Mar 2022",Prob (F-statistic):,9.87e-19
Time:,13:37:33,Log-Likelihood:,-89.441
No. Observations:,341,AIC:,196.9
Df Residuals:,332,BIC:,231.4
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.3880,0.690,2.011,0.045,0.030,2.746
population_density,-0.0001,3.61e-05,-3.575,0.000,-0.000,-5.8e-05
np.log(per_household_income),-0.0723,0.063,-1.146,0.253,-0.196,0.052
out_of_state_rental_ownership,0.2905,0.144,2.013,0.045,0.007,0.574
married_household_prop,-0.8014,0.161,-4.985,0.000,-1.118,-0.485
rental_density,0.0003,0.000,1.860,0.064,-1.99e-05,0.001
bus_density,0.0002,0.000,0.472,0.637,-0.001,0.001
greenlight_density,0.0041,0.001,2.850,0.005,0.001,0.007
people_per_household,0.0193,0.025,0.781,0.435,-0.029,0.068

0,1,2,3
Omnibus:,470.877,Durbin-Watson:,1.579
Prob(Omnibus):,0.0,Jarque-Bera (JB):,85666.123
Skew:,6.53,Prob(JB):,0.0
Kurtosis:,79.542,Cond. No.,54200.0


In [252]:
y, X = dmatrices(
    formula_like=formula,
    data=df,
    return_type="dataframe",
)
# exposure = X.pop('population')
mdl = sm.GLM(endog=y,exog=X,family=sm.families.Poisson())
res = mdl.fit()

res.summary()
res = mdl.fit()
res.summary()

0,1,2,3
Dep. Variable:,call_rate,No. Observations:,341.0
Model:,GLM,Df Residuals:,332.0
Model Family:,Poisson,Df Model:,8.0
Link Function:,Log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-242.69
Date:,"Wed, 02 Mar 2022",Deviance:,52.806
Time:,13:38:14,Pearson chi2:,60.9
No. Iterations:,5,Pseudo R-squ. (CS):,0.08364
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.4649,3.308,0.141,0.888,-6.018,6.948
population_density,-0.0003,0.000,-1.761,0.078,-0.001,3.59e-05
np.log(per_household_income),-0.0715,0.311,-0.230,0.818,-0.682,0.539
out_of_state_rental_ownership,0.7246,0.596,1.216,0.224,-0.444,1.893
married_household_prop,-2.8160,0.945,-2.981,0.003,-4.667,-0.965
rental_density,0.0010,0.001,1.292,0.196,-0.001,0.002
bus_density,-0.0002,0.002,-0.116,0.907,-0.003,0.003
greenlight_density,0.0053,0.005,1.153,0.249,-0.004,0.014
people_per_household,0.0515,0.106,0.488,0.626,-0.155,0.258
