In [5]:
import pandas as pd
import statsmodels.api as sm

### Weighting by ref pop; YEAR as control var; lagged values of dep var
#### **** all regressions with dm ref pop lag vars counts files ****

#### create a func to automate weighted lagged regression - univariate for now

In [25]:
def wols_lagged_control_uni(req_dfs, dep_var, ind_vars, weights_var, control_var=False, dataset_names=False):
    df_count=0
    for df in req_dfs:
        for var in ind_vars:
            if control_var:
                model = sm.WLS.from_formula(f'{dep_var} ~ {var} + lag1_{dep_var} + lag2_{dep_var} + {control_var}', 
                                            data=df, 
                                            weights=df[f'{weights_var}'])
            else:
                model = sm.WLS.from_formula(f'{dep_var} ~ {var} + lag1_{dep_var} + lag2_{dep_var}', 
                                            data=df, 
                                            weights=df[f'{weights_var}'])
                
            results = model.fit()
            if dataset_names:
                print(dataset_names[df_count])
            print(results.summary(), '\n', '\n')
        df_count += 1

In [39]:
dm_dep_var_rates = 'dm_violent_crime_rate'
dm_ind_vars_rates = ['dm_total_officers_rate', 'dm_prison_occupancy_count_rate', 'dm_jail_occupancy_count_rate',
               'dm_drug_tot_arrests_rate', 'dm_disorder_arrests_tot_index_rate']

### All Agencies - 90-15, 90-02, 03-15

In [38]:
all_90_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag/final_main_gte_10k_core_rates_dm_ref_pop_lag.csv')
all_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_90_02/final_main_gte_10k_core_rates_dm_1990_2002_ref_pop_lag.csv')
all_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_03_15/final_main_gte_10k_core_rates_dm_2003_2015_ref_pop_lag.csv')

#### With control var

In [40]:
wols_lagged_control_uni(req_dfs=[all_90_15, all_90_02, all_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       control_var='YEAR',
                       weights_var='fnl_population_wt',
                       dataset_names=['all_90_15', 'all_90_02', 'all_03_15'])

all_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.779
Model:                               WLS   Adj. R-squared:                  0.779
Method:                    Least Squares   F-statistic:                 8.845e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:17:06   Log-Likelihood:            -6.4494e+05
No. Observations:                 100612   AIC:                         1.290e+06
Df Residuals:                     100607   BIC:                         1.290e+06
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

all_90_02
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.744
Model:                               WLS   Adj. R-squared:                  0.744
Method:                    Least Squares   F-statistic:                 3.102e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:17:06   Log-Likelihood:            -2.8019e+05
No. Observations:                  42659   AIC:                         5.604e+05
Df Residuals:                      42654   BIC:                         5.604e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.803
Model:                               WLS   Adj. R-squared:                  0.803
Method:                    Least Squares   F-statistic:                 4.918e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:17:06   Log-Likelihood:            -2.9813e+05
No. Observations:                  48372   AIC:                         5.963e+05
Df Residuals:                      48367   BIC:                         5.963e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------

#### Without control var

In [41]:
wols_lagged_control_uni(req_dfs=[all_90_15, all_90_02, all_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       weights_var='fnl_population_wt',
                       dataset_names=['all_90_15', 'all_90_02', 'all_03_15'])

all_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.777
Model:                               WLS   Adj. R-squared:                  0.777
Method:                    Least Squares   F-statistic:                 1.167e+05
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:17:59   Log-Likelihood:            -6.4534e+05
No. Observations:                 100612   AIC:                         1.291e+06
Df Residuals:                     100608   BIC:                         1.291e+06
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

all_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.780
Model:                               WLS   Adj. R-squared:                  0.780
Method:                    Least Squares   F-statistic:                 1.101e+05
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:17:59   Log-Likelihood:            -5.9577e+05
No. Observations:                  93297   AIC:                         1.192e+06
Df Residuals:                      93293   BIC:                         1.192e+06
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                         coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------

all_03_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.803
Model:                               WLS   Adj. R-squared:                  0.803
Method:                    Least Squares   F-statistic:                 6.554e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:18:00   Log-Likelihood:            -2.9814e+05
No. Observations:                  48372   AIC:                         5.963e+05
Df Residuals:                      48368   BIC:                         5.963e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------

### Large Agencies - 90-15, 90-02, 03-15

In [26]:
#### read in the required files
large_90_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag/final_main_large_cities_core_rates_dm_ref_pop_lag.csv')
large_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_90_02/final_main_large_cities_core_rates_dm_1990_2002_ref_pop_lag.csv')
large_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_03_15/final_main_large_cities_core_rates_dm_2003_2015_ref_pop_lag.csv')

#### With control var

In [28]:
wols_lagged_control_uni(req_dfs=[large_90_15, large_90_02, large_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       control_var='YEAR',
                       weights_var='fnl_population_wt',
                       dataset_names=['large_90_15', 'large_90_02', 'large_03_15'])

large_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.883
Model:                               WLS   Adj. R-squared:                  0.883
Method:                    Least Squares   F-statistic:                 1.581e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           16:58:50   Log-Likelihood:                -51647.
No. Observations:                   8360   AIC:                         1.033e+05
Df Residuals:                       8355   BIC:                         1.033e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.857
Model:                               WLS   Adj. R-squared:                  0.857
Method:                    Least Squares   F-statistic:                     5727.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           16:58:50   Log-Likelihood:                -24237.
No. Observations:                   3836   AIC:                         4.848e+04
Df Residuals:                       3831   BIC:                         4.851e+04
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------

#### Without control var

In [29]:
wols_lagged_control_uni(req_dfs=[large_90_15, large_90_02, large_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       weights_var='fnl_population_wt',
                       dataset_names=['large_90_15', 'large_90_02', 'large_03_15'])

large_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.882
Model:                               WLS   Adj. R-squared:                  0.882
Method:                    Least Squares   F-statistic:                 2.092e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           16:58:51   Log-Likelihood:                -51677.
No. Observations:                   8360   AIC:                         1.034e+05
Df Residuals:                       8356   BIC:                         1.034e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

large_90_02
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.858
Model:                               WLS   Adj. R-squared:                  0.858
Method:                    Least Squares   F-statistic:                     6868.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           16:58:51   Log-Likelihood:                -21629.
No. Observations:                   3420   AIC:                         4.327e+04
Df Residuals:                       3416   BIC:                         4.329e+04
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                         coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------

### Medium Agencies - 90-15, 90-02, 03-15

In [31]:
#### read in the required files
medium_90_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag/final_main_medium_cities_core_rates_dm_ref_pop_lag.csv')
medium_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_90_02/final_main_medium_cities_core_rates_dm_1990_2002_ref_pop_lag.csv')
medium_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_03_15/final_main_medium_cities_core_rates_dm_2003_2015_ref_pop_lag.csv')

#### With control var

In [32]:
wols_lagged_control_uni(req_dfs=[medium_90_15, medium_90_02, medium_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       control_var='YEAR',
                       weights_var='fnl_population_wt',
                       dataset_names=['medium_90_15', 'medium_90_02', 'medium_03_15'])

medium_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.676
Model:                               WLS   Adj. R-squared:                  0.676
Method:                    Least Squares   F-statistic:                     7265.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:03:47   Log-Likelihood:                -84009.
No. Observations:                  13907   AIC:                         1.680e+05
Df Residuals:                      13902   BIC:                         1.681e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------

                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.633
Model:                               WLS   Adj. R-squared:                  0.632
Method:                    Least Squares   F-statistic:                     2598.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:03:47   Log-Likelihood:                -37444.
No. Observations:                   6039   AIC:                         7.490e+04
Df Residuals:                       6034   BIC:                         7.493e+04
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------

#### Without control var

In [33]:
wols_lagged_control_uni(req_dfs=[medium_90_15, medium_90_02, medium_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       weights_var='fnl_population_wt',
                       dataset_names=['medium_90_15', 'medium_90_02', 'medium_03_15'])

medium_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.672
Model:                               WLS   Adj. R-squared:                  0.672
Method:                    Least Squares   F-statistic:                     9511.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:03:50   Log-Likelihood:                -84095.
No. Observations:                  13907   AIC:                         1.682e+05
Df Residuals:                      13903   BIC:                         1.682e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------

medium_90_02
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.625
Model:                               WLS   Adj. R-squared:                  0.625
Method:                    Least Squares   F-statistic:                     3358.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:03:51   Log-Likelihood:                -37503.
No. Observations:                   6039   AIC:                         7.501e+04
Df Residuals:                       6035   BIC:                         7.504e+04
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------

### Small Agencies - 90-15, 90-02, 03-15

In [35]:
#### read in the required files
small_90_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag/final_main_small_cities_core_rates_dm_ref_pop_lag.csv')
small_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_90_02/final_main_small_cities_core_rates_dm_1990_2002_ref_pop_lag.csv')
small_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_03_15/final_main_small_cities_core_rates_dm_2003_2015_ref_pop_lag.csv')

#### With control var

In [36]:
wols_lagged_control_uni(req_dfs=[small_90_15, small_90_02, small_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       control_var='YEAR',
                       weights_var='fnl_population_wt',
                       dataset_names=['small_90_15', 'small_90_02', 'small_03_15'])

small_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.497
Model:                               WLS   Adj. R-squared:                  0.497
Method:                    Least Squares   F-statistic:                 1.936e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:14   Log-Likelihood:            -4.9145e+05
No. Observations:                  78345   AIC:                         9.829e+05
Df Residuals:                      78340   BIC:                         9.829e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.506
Model:                               WLS   Adj. R-squared:                  0.506
Method:                    Least Squares   F-statistic:                 1.870e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:14   Log-Likelihood:            -4.5665e+05
No. Observations:                  73066   AIC:                         9.133e+05
Df Residuals:                      73061   BIC:                         9.134e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                         coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------

                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.570
Model:                               WLS   Adj. R-squared:                  0.570
Method:                    Least Squares   F-statistic:                 1.259e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:14   Log-Likelihood:            -2.3037e+05
No. Observations:                  37988   AIC:                         4.607e+05
Df Residuals:                      37983   BIC:                         4.608e+05
Df Model:                              4                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------

#### Without control var

In [37]:
wols_lagged_control_uni(req_dfs=[small_90_15, small_90_02, small_03_15],
                       dep_var=dm_dep_var_rates,
                       ind_vars=dm_ind_vars_rates,
                       weights_var='fnl_population_wt',
                       dataset_names=['small_90_15', 'small_90_02', 'small_03_15'])

small_90_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.493
Model:                               WLS   Adj. R-squared:                  0.493
Method:                    Least Squares   F-statistic:                 2.540e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:14   Log-Likelihood:            -4.9176e+05
No. Observations:                  78345   AIC:                         9.835e+05
Df Residuals:                      78341   BIC:                         9.836e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

small_90_02
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.447
Model:                               WLS   Adj. R-squared:                  0.447
Method:                    Least Squares   F-statistic:                     8828.
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:15   Log-Likelihood:            -2.1168e+05
No. Observations:                  32784   AIC:                         4.234e+05
Df Residuals:                      32780   BIC:                         4.234e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                 coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------

small_03_15
                              WLS Regression Results                             
Dep. Variable:     dm_violent_crime_rate   R-squared:                       0.568
Model:                               WLS   Adj. R-squared:                  0.568
Method:                    Least Squares   F-statistic:                 1.614e+04
Date:                   Thu, 09 Apr 2020   Prob (F-statistic):               0.00
Time:                           17:06:15   Log-Likelihood:            -2.2371e+05
No. Observations:                  36847   AIC:                         4.474e+05
Df Residuals:                      36843   BIC:                         4.475e+05
Df Model:                              3                                         
Covariance Type:               nonrobust                                         
                                   coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------