First, import needed modules

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

Initalize variables

In [2]:
path = os.getcwd()
print(path)

/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters


Import datasets

In [44]:
controls = pd.read_csv(f'{path}/data/controls.csv', encoding = 'UTF-8-SIG', sep=',', converters={'AGS': str})
treatment = pd.read_csv(f'{path}/data/treatment.csv', converters = {'AGS': str})

Merge df

In [45]:
control_treat = pd.merge(controls, treatment, on='AGS', how='outer', indicator=True)
control_treat['_merge'].value_counts()

both          336665
left_only       6382
right_only         6
Name: _merge, dtype: int64

Drop early years

In [49]:
control_treat = control_treat[control_treat['year'] > 1993]

In [50]:
avg_dfs = {2005: 'treatment_05', 2010: 'treatment_09', 2014: 'treatment_13'}
for year, treatment in avg_dfs.items():
    avg_dfs[year] = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', treatment, 'year']].groupby([treatment, 'year'], as_index = False).agg([np.mean, np.std, 'count'])
    avg_dfs[year] = avg_dfs[year].reset_index()
    avg_dfs[year].rename(columns = {treatment:'treatment'}, inplace = True)
avg_dfs[2005].head()

Unnamed: 0_level_0,treatment,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Direct Line,1994.0,160.742884,206.29391,263,4.001793,1.515018,263,43.17316,2.609361,263,50.607804,0.963976,263
1,Direct Line,1995.0,162.194687,207.877288,263,4.074475,1.529717,263,43.044681,2.651231,263,50.627262,0.982438,263
2,Direct Line,1996.0,163.712778,209.904193,263,4.147156,1.552604,263,42.916203,2.703789,263,50.619766,1.005114,263
3,Direct Line,1997.0,164.344095,211.415888,263,4.219838,1.583324,263,42.787724,2.766426,263,50.625007,1.009604,263
4,Direct Line,1998.0,164.827472,212.010107,263,4.29252,1.621431,263,42.659245,2.838476,263,50.617739,1.015787,263


Plot the averages of pop_density, unemployed, avg_age and female over years for treated and untreated municipalities

In [64]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km']
colors = ['#800000', '#ffd8b1', '#808000', '#42d4f4', '#911eb4']
markers = ['o', 's', 'v', 'D', '^']

for year, df in avg_dfs.items():
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
    # set legend font size
    plt.rc('legend',fontsize=6)
    # title for entire figure
    fig.suptitle(f'Averages values of control variables \n by treatment status in {year}', fontsize=20)
    #for ax in axes:
        #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
    for i, treatment in enumerate(treatments):
        plot_data = df[df['treatment'] == treatment]
        # edit subplots
        axes[0, 0].set_title('Population Density', fontsize=12)
        axes[0, 0].set_ylabel('Population Density', fontsize=10)
        axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[0, 0].legend(loc='upper left')
        axes[0, 0].axvline(x=year, color='red', linestyle='--')
        axes[0, 0].set_ylim(140, 300)
        axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[0, 0].tick_params(labelsize = 8)

        axes[0, 1].set_title('Share of Unemployed', fontsize=12)
        axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
        axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[0, 1].legend(loc='upper left')
        axes[0, 1].axvline(x=year, color='red', linestyle='--')
        axes[0, 1].set_ylim(3.2, 5)
        axes[0, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[0, 1].tick_params(labelsize = 8)
        axes[0, 1].axvspan(1994, 1994, facecolor='grey', alpha = 0.8)
        axes[0, 1].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

        axes[1, 0].set_title('Average Age', fontsize=12)
        axes[1, 0].set_ylabel('Average Age', fontsize=10)
        axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[1, 0].legend(loc='upper left')
        axes[1, 0].axvline(x=year, color='red', linestyle='--')
        axes[1, 0].set_ylim(42, 45)
        axes[1, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[1, 0].tick_params(labelsize = 8)
        axes[1, 0].axvspan(1994, 2000, facecolor='grey', alpha = 0.8)
        axes[1, 0].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

        axes[1, 1].set_title('Share of Females', fontsize=12)
        axes[1, 1].set_ylabel('% Female', fontsize=10)
        axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
        axes[1, 1].legend(loc='upper left')
        axes[1, 1].axvline(x=year, color='red', linestyle='--')
        axes[1, 1].set_ylim(50, 51.3)
        axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[1, 1].tick_params(labelsize = 8)
        axes[1, 1].axvspan(1994, 1994, facecolor='grey', alpha = 0.8)
        axes[1, 1].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

    # Save figure
    fig.savefig(f'{path}/figures/controls/{year}_controls.png')
    plt.close()

Plot graph for ever-treated status

In [65]:
avg_ever_treated = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'ever_treated', 'year']].groupby(['ever_treated', 'year'], as_index = False).agg([np.mean, np.std, 'count'])
avg_ever_treated = avg_ever_treated.reset_index()
avg_ever_treated.rename(columns = {treatment:'treatment'}, inplace = True)
avg_ever_treated.head()

Unnamed: 0_level_0,ever_treated,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Direct Line,1994.0,210.666273,335.487221,822,4.004111,1.630332,822,43.036688,2.221406,822,50.858067,1.175226,822
1,Direct Line,1995.0,212.410166,335.830126,822,4.05646,1.636037,822,42.942502,2.24703,822,50.871149,1.200164,822
2,Direct Line,1996.0,214.0896,336.648733,822,4.108809,1.647507,822,42.848317,2.282517,822,50.864757,1.207898,822
3,Direct Line,1997.0,215.217418,335.655752,822,4.161158,1.664623,822,42.754131,2.327415,822,50.871175,1.210295,822
4,Direct Line,1998.0,216.426645,335.241116,822,4.213507,1.687214,822,42.659945,2.381193,822,50.869527,1.213815,822


In [66]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km']
colors = ['#800000', '#ffd8b1', '#808000', '#42d4f4', '#911eb4']
markers = ['o', 's', 'v', 'D', '^']

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
# set legend font size
plt.rc('legend',fontsize=6)
# title for entire figure
fig.suptitle(f'Averages values of control variables \n by ever treated status', fontsize=20)
#for ax in axes:
    #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
for i, treatment in enumerate(treatments):
    plot_data = avg_ever_treated[avg_ever_treated['ever_treated'] == treatment]
    # edit subplots
    axes[0, 0].set_title('Population Density', fontsize=12)
    axes[0, 0].set_ylabel('Population Density', fontsize=10)
    axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[0, 0].legend(loc='upper left')
    axes[0, 0].axvline(x=year, color='red', linestyle='--')
    axes[0, 0].set_ylim(140, 300)
    axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[0, 0].tick_params(labelsize = 8)

    axes[0, 1].set_title('Share of Unemployed', fontsize=12)
    axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
    axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[0, 1].legend(loc='upper left')
    axes[0, 1].axvline(x=year, color='red', linestyle='--')
    axes[0, 1].set_ylim(3.2, 5)
    axes[0, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[0, 1].tick_params(labelsize = 8)
    axes[0, 1].axvspan(1994, 1994, facecolor='grey', alpha = 0.8)
    axes[0, 1].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

    axes[1, 0].set_title('Average Age', fontsize=12)
    axes[1, 0].set_ylabel('Average Age', fontsize=10)
    axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[1, 0].legend(loc='upper left')
    axes[1, 0].axvline(x=year, color='red', linestyle='--')
    axes[1, 0].set_ylim(42, 45)
    axes[1, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[1, 0].tick_params(labelsize = 8)
    axes[1, 0].axvspan(1994, 2000, facecolor='grey', alpha = 0.8)
    axes[1, 0].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

    axes[1, 1].set_title('Share of Females', fontsize=12)
    axes[1, 1].set_ylabel('% Female', fontsize=10)
    axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], color=colors[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, capsize=4)
    axes[1, 1].legend(loc='upper left')
    axes[1, 1].axvline(x=year, color='red', linestyle='--')
    axes[1, 1].set_ylim(50, 51.3)
    axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[1, 1].tick_params(labelsize = 8)
    axes[1, 1].axvspan(1994, 1994, facecolor='grey', alpha = 0.8)
    axes[1, 1].axvspan(2018, 2019, facecolor='grey', alpha = 0.8)

# Save figure
fig.savefig(f'{path}/figures/controls/ever_treated_controls.png')
plt.close()