First, import needed modules

In [114]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import geopandas as gpd
import matplotlib.cm as cm
import matplotlib.colors as mcolors
from matplotlib.lines import Line2D

Initalize variables

In [115]:
path = os.getcwd()
print(path)

/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters


Import datasets

In [116]:
controls = pd.read_csv(f'{path}/data/controls.csv', encoding = 'UTF-8-SIG', sep=',', converters={'AGS': str})
treatment = pd.read_csv(f'{path}/data/treatment.csv', converters = {'AGS': str})

Merge df

In [117]:
control_treat = pd.merge(controls, treatment, on='AGS', how='outer', indicator=True)
control_treat['_merge'].value_counts()

both          336665
left_only       6382
right_only         6
Name: _merge, dtype: int64

Drop early and late years

In [118]:
control_treat = control_treat[control_treat['year'] > 1993]
control_treat = control_treat[control_treat['year'] < 2018]

Fig E22: Controls trends by ever-treated status

In [119]:
avg_ever_treated = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'ever_treated', 'year']].groupby(['ever_treated', 'year'], as_index = False).agg([np.mean, np.std, 'count'])
avg_ever_treated = avg_ever_treated.reset_index()
avg_ever_treated.rename(columns = {'ever_treated':'treatment'}, inplace = True)
mean = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'year']].groupby(['year'], as_index = False).agg([np.mean, np.std, 'count'])
mean = mean.reset_index()
mean['treatment'] = 'Mean'
avg_ever_treated = pd.concat([avg_ever_treated, mean])
avg_ever_treated[avg_ever_treated['treatment'] == 'Mean'].head()

Unnamed: 0_level_0,treatment,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Mean,1994.0,177.983198,273.726801,11235,,,0,,,0,,,0
1,Mean,1995.0,179.495683,273.649804,11235,,,0,,,0,50.554116,1.152348,10799
2,Mean,1996.0,180.660328,273.530082,11235,,,0,,,0,50.516251,1.035296,10799
3,Mean,1997.0,181.54329,272.758212,11235,,,0,,,0,50.497382,1.019976,10799
4,Mean,1998.0,182.32516,272.530649,11235,4.562052,2.441889,10799,,,0,50.47358,1.016162,10799


In [120]:
avg_ever_treated['treatment'].value_counts()

Direct Line        24
None               24
Within 0-15 km     24
Within 15-30 km    24
Within 30-50 km    24
Mean               24
Name: treatment, dtype: int64

In [121]:
treatments = ['None', 'Direct Line', 'Within 0-15 km', 'Within 15-30 km', 'Within 30-50 km', 'Mean']
colors = ['#800000', '#ffd8b1', '#808000', '#34c395', '#911eb4', 'blue']
markers = ['o', 's', 'v', 'D', '^', 'h']

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
# set legend font size
plt.rc('legend',fontsize=6)
# title for entire figure
fig.suptitle(f'Fig. E22: \n Control Variables - Averages by ever-treated status \n', fontsize=16)
#for ax in axes:
    #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
for i, treatment in enumerate(treatments):
    plot_data = avg_ever_treated[avg_ever_treated['treatment'] == treatment]
    # edit subplots
    axes[0, 0].set_title('Population Density', fontsize=12)
    axes[0, 0].set_ylabel('Population Density', fontsize=10)
    axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[0, 0].legend(loc='upper left')
    axes[0, 0].set_ylim(140, 320)
    axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[0, 0].tick_params(labelsize = 8)

    axes[0, 1].set_title('Share of Unemployed', fontsize=12)
    axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
    axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[0, 1].legend(loc='upper left')
    axes[0, 1].set_ylim(2, 6)
    axes[0, 1].set_xlim(1997, 2018)
    axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
    axes[0, 1].tick_params(labelsize = 8)

    axes[1, 0].set_title('Average Age', fontsize=12)
    axes[1, 0].set_ylabel('Average Age', fontsize=10)
    axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[1, 0].legend(loc='upper left')
    axes[1, 0].set_ylim(40, 46)
    axes[1, 0].set_xlim(2000, 2018)
    axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
    axes[1, 0].tick_params(labelsize = 8)

    axes[1, 1].set_title('Share of Females', fontsize=12)
    axes[1, 1].set_ylabel('% Female', fontsize=10)
    axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[1, 1].legend(loc='upper left')
    axes[1, 1].set_ylim(49.9, 51.1)
    axes[1, 1].set_xlim(1994, 2018)
    axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[1, 1].tick_params(labelsize = 8)

# Save figure
fig.savefig(f'{path}/figures/py/E22_ever_treated_controls.png', bbox_inches="tight", pad_inches=0)
plt.close()

Fig E23-E25: Controls trends by treatment year

In [122]:
avg_dfs = {2005: 'treatment_05', 2010: 'treatment_09', 2014: 'treatment_13'}
for year, treatment in avg_dfs.items():
    avg_dfs[year] = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', treatment, 'year']].groupby([treatment, 'year'], as_index = False).agg([np.mean, np.std, 'count'])
    avg_dfs[year] = avg_dfs[year].reset_index()
    avg_dfs[year].rename(columns = {treatment:'treatment'}, inplace = True)
    mean = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'year']].groupby(['year'], as_index = False).agg([np.mean, np.std, 'count'])
    mean = mean.reset_index()
    mean['treatment'] = 'Mean'
    avg_dfs[year] = pd.concat([avg_dfs[year], mean])
avg_dfs[2005][avg_dfs[2005]['treatment'] == 'Mean'].head(40)

Unnamed: 0_level_0,treatment,year,pop_density,pop_density,pop_density,unemployed,unemployed,unemployed,avg_age,avg_age,avg_age,female,female,female
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count,mean,std,count,mean,std,count,mean,std,count
0,Mean,1994.0,177.983198,273.726801,11235,,,0,,,0,,,0
1,Mean,1995.0,179.495683,273.649804,11235,,,0,,,0,50.554116,1.152348,10799
2,Mean,1996.0,180.660328,273.530082,11235,,,0,,,0,50.516251,1.035296,10799
3,Mean,1997.0,181.54329,272.758212,11235,,,0,,,0,50.497382,1.019976,10799
4,Mean,1998.0,182.32516,272.530649,11235,4.562052,2.441889,10799,,,0,50.47358,1.016162,10799
5,Mean,1999.0,183.131933,271.172853,11235,4.352015,2.440473,10799,,,0,50.456501,1.005093,10799
6,Mean,2000.0,183.743671,271.892317,11235,4.207775,2.698692,10799,,,0,50.446845,0.99752,10799
7,Mean,2001.0,184.315102,272.334939,11235,4.273641,2.866527,10799,40.241344,1.774452,10799,50.42091,0.999051,10799
8,Mean,2002.0,184.603404,272.895422,11235,4.490443,2.878124,10799,40.555035,1.801721,10799,50.393882,0.993415,10799
9,Mean,2003.0,184.637231,272.734872,11235,4.862838,3.015907,10798,40.897454,1.816544,10798,50.396477,0.98958,10798


In [123]:
treatments = ['None', 'Direct Line', 'Within 0-15 km', 'Within 15-30 km', 'Within 30-50 km', 'Mean']
colors = ['#800000', '#ffd8b1', '#808000', '#34c395', '#911eb4', 'blue']
markers = ['o', 's', 'v', 'D', '^', 'h']
n = 23
for year, df in avg_dfs.items():
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
    # set legend font size
    plt.rc('legend',fontsize=6)
    # title for entire figure
    fig.suptitle(f'Fig. E{n}: \n Control Variables - Averages by treatment status in {year}', fontsize=16)
    for i, treatment in enumerate(treatments):
        plot_data = df[df['treatment'] == treatment]
        # edit subplots
        axes[0, 0].set_title('Population Density', fontsize=12)
        axes[0, 0].set_ylabel('Population Density', fontsize=10)
        axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[0, 0].legend(loc='upper left')
        axes[0, 0].axvline(x=year, c='red', linestyle='--')
        axes[0, 0].set_ylim(140, 320)
        axes[0, 0].set_xlim(1994, 2018)
        axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[0, 0].tick_params(labelsize = 8)

        axes[0, 1].set_title('Share of Unemployed', fontsize=12)
        axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
        axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[0, 1].legend(loc='upper left')
        axes[0, 1].axvline(x=year, c='red', linestyle='--')
        axes[0, 1].set_ylim(2, 6)
        axes[0, 1].set_xlim(1997, 2018)
        axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
        axes[0, 1].tick_params(labelsize = 8)

        axes[1, 0].set_title('Average Age', fontsize=12)
        axes[1, 0].set_ylabel('Average Age', fontsize=10)
        axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[1, 0].legend(loc='upper left')
        axes[1, 0].axvline(x=year, c='red', linestyle='--')
        axes[1, 0].set_ylim(40, 46)
        axes[1, 0].set_xlim(2000, 2018)
        axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
        axes[1, 0].tick_params(labelsize = 8)

        axes[1, 1].set_title('Share of Females', fontsize=12)
        axes[1, 1].set_ylabel('% Female', fontsize=10)
        axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], c=colors[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[1, 1].legend(loc='upper left')
        axes[1, 1].axvline(x=year, c='red', linestyle='--')
        axes[1, 1].set_ylim(49.9, 51.1)
        axes[1, 1].set_xlim(1994, 2018)
        axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[1, 1].tick_params(labelsize = 8)

    # Save figure
    fig.savefig(f'{path}/figures/py/E{n}_{year}_controls.png', bbox_inches="tight", pad_inches=0)
    plt.close()
    n = n + 1

Fig E30-E33: Choropleth maps of controls by year

In [124]:
municipalities = gpd.read_file(f'{path}/data/geodata/municipalities.shp')
municipalities = municipalities.set_geometry('geometry')
states = gpd.read_file(f'{path}/data/geodata/VG250_LAN.shp')
powerlines = gpd.read_file(f'{path}/data/geodata/all_powerlines.shp')
powerlines = powerlines.set_geometry('geometry')
powerlines['year'] = powerlines['year'].astype(int)
# set year to treatment year
powerlines['year'] = np.where(powerlines['year'] == 2013, 2014, powerlines['year'])
powerlines['year'] = np.where(powerlines['year'] == 2009, 2010, powerlines['year'])

In [125]:
controls_muns = pd.merge(controls, municipalities, on='AGS')
controls_muns.shape

(336665, 15)

In [126]:
#Drop not needed years for plot
years = [2005, 2010, 2014]
controls_muns = controls_muns[controls_muns['year'].isin(years)]
controls_muns.shape

(33757, 15)

In [128]:
legend_dict = {2005: 'DENA 2005', 2010: 'EnLAG 2009', 2014: 'BBPlG 2013'}
control_vars = {'pop_density': 'Population Density', 'unemployed': 'Share of Unemployed', 'avg_age': 'Average Age', 'female': 'Share of Females'}
n = 26
for var, title in control_vars.items():
    # initialize figure 
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 7), dpi=300, constrained_layout=True)
    # title for entire figure
    fig.suptitle(f'Fig. E{n}: \n {title}', fontsize=16)
    for i, (year, desc) in enumerate(legend_dict.items()):
        # subset powerlines
        plot_pl = powerlines[powerlines['year'] == year]
        # subset controls plot
        controls_plot = controls_muns[controls_muns['year'] == year]
        controls_plot = controls_plot.set_geometry('geometry')
        # get min and max values
        min = controls_muns.min(axis=0, numeric_only=True)[var]
        max = controls_muns.max(axis=0, numeric_only=True)[var]
        cmap = 'viridis'
        # draw subfig
        ax[i].set_aspect('equal')
        ax[i].set_adjustable('datalim', share=True)
        ax[i].set_axis_off()
        ax[i].set_title(f'\n {desc}')
        controls_plot.plot(column=var, ax=ax[i], vmin=min, vmax=max, cmap=cmap, legend_kwds={'orientation': "horizontal"}, zorder=1)
        states.boundary.plot(ax=ax[i], color='white', lw = 0.1, zorder=3)
        plot_pl.plot(ax=ax[i], color='red', lw=2, zorder=3)
    # define a mappable based on which the colorbar will be drawn
    mappable = cm.ScalarMappable(norm=mcolors.Normalize(min, max), cmap=cmap)
    # define position and extent of colorbar
    cb_ax = fig.add_axes([0.1, 0.1, 0.8, 0.05])
    # draw colorbar
    cbar = fig.colorbar(mappable, cax=cb_ax, orientation='horizontal', label = labels[var])
    plt.savefig(f'{path}/figures/py/E{n}_map_{var}.png', bbox_inches="tight", pad_inches=0)
    plt.close()
    print(var)
    n = n + 1

pop_density
unemployed
avg_age
female
