First, import needed modules

In [122]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

Initalize variables

In [123]:
path = os.getcwd()
print(path)

/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters


Import datasets

In [124]:
controls = pd.read_csv(f'{path}/data/controls.csv', encoding = 'UTF-8-SIG', sep=',', converters={'AGS': str})
treatment = pd.read_csv(f'{path}/data/treatment.csv', converters = {'AGS': str})

Merge df

In [125]:
control_treat = pd.merge(controls, treatment, on='AGS', how='outer', indicator=True)
control_treat['_merge'].value_counts()

both          336665
left_only       6382
right_only         6
Name: _merge, dtype: int64

Drop early years

In [126]:
control_treat = control_treat[control_treat['year'] > 1993]

In [127]:
avg_dfs = {2005: 'treatment_05', 2010: 'treatment_09', 2014: 'treatment_13'}
for year, treatment in avg_dfs.items():
    avg_dfs[year] = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', treatment, 'year']].groupby([treatment, 'year'], as_index = False).agg([np.mean, np.std, 'count'])
    avg_dfs[year] = avg_dfs[year].reset_index()
    avg_dfs[year].rename(columns = {treatment:'treatment'}, inplace = True)
avg_dfs[2005].head(10)

Unnamed: 0_level_0,treatment,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Direct Line,1994.0,160.742884,206.29391,263,,,0,,,0,,,0
1,Direct Line,1995.0,162.194687,207.877288,263,,,0,,,0,50.566696,0.946065,112
2,Direct Line,1996.0,163.712778,209.904193,263,,,0,,,0,50.544911,1.00276,112
3,Direct Line,1997.0,164.344095,211.415888,263,,,0,,,0,50.553036,1.017316,112
4,Direct Line,1998.0,164.827472,212.010107,263,4.498629,1.648418,112,,,0,50.531786,1.033306,112
5,Direct Line,1999.0,165.097361,211.916325,263,4.308182,1.657781,112,,,0,50.518125,1.010024,112
6,Direct Line,2000.0,165.80423,212.060085,263,3.995782,1.759012,112,,,0,50.498571,1.073288,112
7,Direct Line,2001.0,165.959969,212.641625,263,3.956907,1.812756,112,39.908482,2.289702,112,50.482679,1.067519,112
8,Direct Line,2002.0,165.713614,213.074296,263,4.13052,1.767236,112,40.209018,2.282027,112,50.473839,1.071933,112
9,Direct Line,2003.0,165.339362,212.767199,263,4.417582,1.867868,112,40.568036,2.179504,112,50.451339,1.041294,112


Plot the averages of pop_density, unemployed, avg_age and female over years for treated and untreated municipalities

In [128]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km']
colors = ['#800000', '#ffd8b1', '#808000', '#42d4f4', '#911eb4']
markers = ['o', 's', 'v', 'D', '^']

for year, df in avg_dfs.items():
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
    # set legend font size
    plt.rc('legend',fontsize=6)
    # title for entire figure
    fig.suptitle(f'Averages values of control variables \n by treatment status in {year}', fontsize=20)
    #for ax in axes:
        #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
    for i, treatment in enumerate(treatments):
        plot_data = df[df['treatment'] == treatment]
        # edit subplots
        axes[0, 0].set_title('Population Density', fontsize=12)
        axes[0, 0].set_ylabel('Population Density', fontsize=10)
        axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[0, 0].legend(loc='upper left')
        axes[0, 0].axvline(x=year, color='red', linestyle='--')
        axes[0, 0].set_ylim(140, 300)
        axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[0, 0].tick_params(labelsize = 8)

        axes[0, 1].set_title('Share of Unemployed', fontsize=12)
        axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
        axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[0, 1].legend(loc='upper left')
        axes[0, 1].axvline(x=year, color='red', linestyle='--')
        axes[0, 1].set_ylim(2, 6)
        axes[0, 1].set_xlim(1997, 2018)
        axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
        axes[0, 1].tick_params(labelsize = 8)

        axes[1, 0].set_title('Average Age', fontsize=12)
        axes[1, 0].set_ylabel('Average Age', fontsize=10)
        axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[1, 0].legend(loc='upper left')
        axes[1, 0].axvline(x=year, color='red', linestyle='--')
        axes[1, 0].set_ylim(40, 46)
        axes[1, 0].set_xlim(2000, 2018)
        axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
        axes[1, 0].tick_params(labelsize = 8)

        axes[1, 1].set_title('Share of Females', fontsize=12)
        axes[1, 1].set_ylabel('% Female', fontsize=10)
        axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[1, 1].legend(loc='upper left')
        axes[1, 1].axvline(x=year, color='red', linestyle='--')
        axes[1, 1].set_ylim(50, 51.1)
        axes[1, 1].set_xlim(1994, 2018)
        axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[1, 1].tick_params(labelsize = 8)

    # Save figure
    fig.savefig(f'{path}/figures/controls/{year}_controls.png')
    plt.close()

Plot graph for ever-treated status

In [129]:
avg_ever_treated = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'ever_treated', 'year']].groupby(['ever_treated', 'year'], as_index = False).agg([np.mean, np.std, 'count'])
avg_ever_treated = avg_ever_treated.reset_index()
avg_ever_treated.rename(columns = {treatment:'treatment'}, inplace = True)
avg_ever_treated.head()

Unnamed: 0_level_0,ever_treated,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Direct Line,1994.0,210.666273,335.487221,822,,,0,,,0,,,0
1,Direct Line,1995.0,212.410166,335.830126,822,,,0,,,0,50.742305,1.1815,295
2,Direct Line,1996.0,214.0896,336.648733,822,,,0,,,0,50.720644,1.204085,295
3,Direct Line,1997.0,215.217418,335.655752,822,,,0,,,0,50.734678,1.215508,295
4,Direct Line,1998.0,216.426645,335.241116,822,4.431639,1.694546,295,,,0,50.726237,1.227244,295


In [130]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km']
colors = ['#800000', '#ffd8b1', '#808000', '#42d4f4', '#911eb4']
markers = ['o', 's', 'v', 'D', '^']

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
# set legend font size
plt.rc('legend',fontsize=6)
# title for entire figure
fig.suptitle(f'Averages values of control variables \n by ever treated status', fontsize=20)
#for ax in axes:
    #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
for i, treatment in enumerate(treatments):
    plot_data = avg_ever_treated[avg_ever_treated['ever_treated'] == treatment]
    # edit subplots
    axes[0, 0].set_title('Population Density', fontsize=12)
    axes[0, 0].set_ylabel('Population Density', fontsize=10)
    axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[0, 0].legend(loc='upper left')
    axes[0, 0].set_ylim(140, 300)
    axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[0, 0].tick_params(labelsize = 8)

    axes[0, 1].set_title('Share of Unemployed', fontsize=12)
    axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
    axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[0, 1].legend(loc='upper left')
    axes[0, 1].set_ylim(2, 6)
    axes[0, 1].set_xlim(1997, 2018)
    axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
    axes[0, 1].tick_params(labelsize = 8)

    axes[1, 0].set_title('Average Age', fontsize=12)
    axes[1, 0].set_ylabel('Average Age', fontsize=10)
    axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[1, 0].legend(loc='upper left')
    axes[1, 0].set_ylim(40, 46)
    axes[1, 0].set_xlim(2000, 2018)
    axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
    axes[1, 0].tick_params(labelsize = 8)

    axes[1, 1].set_title('Share of Females', fontsize=12)
    axes[1, 1].set_ylabel('% Female', fontsize=10)
    axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[1, 1].legend(loc='upper left')
    axes[1, 1].set_ylim(50, 51.1)
    axes[1, 1].set_xlim(1994, 2018)
    axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[1, 1].tick_params(labelsize = 8)

# Save figure
fig.savefig(f'{path}/figures/controls/ever_treated_controls.png')
plt.close()