In [76]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

from ipynb.fs.defs.initial_modeling_fnl_dm_weighted import wols_multiple_data_multiple_ind_vars

### Read in the required datasets

In [86]:
#### read the required data files

## counts
fnl_large_core_dm_counts_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_large_cities_core_counts_dm_1990_2002.csv')
fnl_medium_core_dm_counts_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_medium_cities_core_counts_dm_1990_2002.csv')
fnl_small_core_dm_counts_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_small_cities_core_counts_dm_1990_2002.csv')

fnl_large_core_dm_counts_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_large_cities_core_counts_dm_2003_2015.csv')
fnl_medium_core_dm_counts_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_medium_cities_core_counts_dm_2003_2015.csv')
fnl_small_core_dm_counts_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_small_cities_core_counts_dm_2003_2015.csv')

## rates
fnl_large_core_dm_rates_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_large_cities_core_rates_dm_1990_2002.csv')
fnl_medium_core_dm_rates_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_medium_cities_core_rates_dm_1990_2002.csv')
fnl_small_core_dm_rates_90_02 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02/final_main_small_cities_core_rates_dm_1990_2002.csv')

fnl_large_core_dm_rates_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_large_cities_core_rates_dm_2003_2015.csv')
fnl_medium_core_dm_rates_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_medium_cities_core_rates_dm_2003_2015.csv')
fnl_small_core_dm_rates_03_15 = pd.read_csv('/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15/final_main_small_cities_core_rates_dm_2003_2015.csv')

#### get median pop to be used as weights

In [88]:
def create_pop_wt_col_by_median(req_dfs):
    for df in req_dfs:
        # returns a df pop_med
        pop_med = df.groupby('ORI').agg({'fnl_population':np.median})
        pop_med.rename({'fnl_population': 'fnl_population_wt'}, axis=1, inplace=True)
        df = df.merge(pop_med, on='ORI', how='left')

In [89]:
#### Create pop wt col for dm counts
create_pop_wt_col_by_median(req_dfs=[fnl_large_core_dm_counts_90_02, fnl_medium_core_dm_counts_90_02, fnl_small_core_dm_counts_90_02,
                                    fnl_large_core_dm_counts_03_15, fnl_medium_core_dm_counts_03_15, fnl_small_core_dm_counts_03_15])

In [90]:
#### Create pop wt col for dm rates
create_pop_wt_col_by_median(req_dfs=[fnl_large_core_dm_rates_90_02, fnl_medium_core_dm_rates_90_02, fnl_small_core_dm_rates_90_02,
                                     fnl_large_core_dm_rates_03_15, fnl_medium_core_dm_rates_03_15, fnl_small_core_dm_rates_03_15])

#### func to create lagged vars with reqd shift

In [81]:
def create_lag_vars(req_dfs, vars_to_lag, groupby_var, num_of_lags):
    for df in req_dfs:
        count=1
        for lag_num in range(num_of_lags):
            for var in vars_to_lag:
                df[f'lag{count}_{var}'] = df.groupby(f'{groupby_var}')[f'{var}'].shift(count)
            count+=1

#### create lags for counts

In [91]:
create_lag_vars(req_dfs=[fnl_large_core_dm_counts_90_02, fnl_medium_core_dm_counts_90_02, fnl_small_core_dm_counts_90_02,
                                    fnl_large_core_dm_counts_03_15, fnl_medium_core_dm_counts_03_15, fnl_small_core_dm_counts_03_15], 
                vars_to_lag=['dm_violent_crime', 'dm_total_officers', 
                             'dm_prison_occupancy_count', 'dm_jail_occupancy_count', 
                             'dm_drug_tot_arrests', 'dm_disorder_arrests_tot_index'],
                groupby_var = 'ORI',
               num_of_lags=2)

#### create lags for rates

In [93]:
create_lag_vars(req_dfs=[fnl_large_core_dm_rates_90_02, fnl_medium_core_dm_rates_90_02, fnl_small_core_dm_rates_90_02,
                                     fnl_large_core_dm_rates_03_15, fnl_medium_core_dm_rates_03_15, fnl_small_core_dm_rates_03_15], 
                vars_to_lag=['dm_violent_crime_rate', 'dm_total_officers_rate', 
                             'dm_prison_occupancy_count_rate', 'dm_jail_occupancy_count_rate',
                             'dm_drug_tot_arrests_rate', 'dm_disorder_arrests_tot_index_rate'],
                groupby_var = 'ORI',
               num_of_lags=2)

In [None]:
def wols_multiple_data_multiple_ind_vars(req_dfs, dep_vars, ind_vars, weights_var, dataset_names=False, counts=True):

