In [56]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
import os

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

### Create ref weight col with median pop

In [87]:
def create_pop_wt_col_by_median(dir_path, op_path, orig=False):
    os.chdir(dir_path)
    for fl in os.listdir():
        if fl != '.DS_Store':
            fl_name = os.path.basename(fl).split('.')[0]
            df = pd.read_csv(fl)
            pop = 0
            if orig:
                pop = 'population'
            else:
                pop = 'fnl_population'
            df_req = df.loc[:, ['ORI', f'{pop}']]
            df_req_med = df_req.groupby('ORI').agg({f'{pop}':np.median})
            df_req_med.rename({f'{pop}': f'{pop}_wt'}, axis=1, inplace=True)
            df = df.merge(df_req_med, on='ORI', how='left')
            df.to_csv(f'{op_path}/{fl_name}_ref_pop.csv', index=False)

#### Create pop wt col for all dm

In [67]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref')

#### Create pop wt col for 90-02 dm

In [68]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_90_02',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_90_02')

#### Create pop wt col for 03-15 dm

In [69]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_03_15',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_03_15')

#### Create pop wt col for orig 90-02

In [88]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_90_02',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_pop_ref_90_02',
                           orig=True)

#### Create pop wt col for orig 03-15

In [89]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_03_15',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_pop_ref_03_15',
                           orig=True)

#### Create pop wt col for orig 90-15

In [90]:
create_pop_wt_col_by_median(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_pop_ref',
                           orig=True)

### Create lagged vars with reqd shift

In [77]:
vars_lag_list_counts = ['dm_violent_crime', 'dm_total_officers', 'dm_prison_occupancy_count', 'dm_jail_occupancy_count', 'dm_drug_tot_arrests', 
               'dm_disorder_arrests_tot_index']

vars_lag_list_rates = ['dm_violent_crime_rate', 'dm_total_officers_rate', 'dm_prison_occupancy_count_rate', 'dm_jail_occupancy_count_rate',
               'dm_drug_tot_arrests_rate', 'dm_disorder_arrests_tot_index_rate']

In [80]:
def create_lag_vars(dir_path, op_path, groupby_var, num_of_lags):
    os.chdir(dir_path)
    for fl in os.listdir():
        if fl != '.DS_Store':
            fl_name = os.path.basename(fl).split('.')[0]
            if 'counts' in fl_name.split('_'):
                vars_to_lag = vars_lag_list_counts
            elif 'rates' in fl_name.split('_'):
                vars_to_lag = vars_lag_list_rates
            df = pd.read_csv(fl)
            count=1
            for lag_num in range(num_of_lags):
                for var in vars_to_lag:
                    df[f'lag{count}_{var}'] = df.groupby(f'{groupby_var}')[f'{var}'].shift(count)
                count+=1
            df.to_csv(f'{op_path}/{fl_name}_lag.csv', index=False)

#### create lags for all dm ref pop counts

In [81]:
create_lag_vars(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref',
                op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag',
               groupby_var='ORI',
               num_of_lags=2)

#### create lags for 90-02 dm ref pop counts

In [82]:
create_lag_vars(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_90_02',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_90_02',
               groupby_var='ORI',
               num_of_lags=2)

#### create lags for 03-15 dm ref pop counts


In [83]:
create_lag_vars(dir_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_03_15',
                            op_path='/Users/salma/Research/us_crime_data_analysis/data/final_core_dm_pop_ref_lag_03_15',
               groupby_var='ORI',
               num_of_lags=2)