First, import needed modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import geopandas as gpd

Initalize variables

In [None]:
path = os.getcwd()
print(path)

Import datasets

In [None]:
controls = pd.read_csv(f'{path}/data/controls.csv', encoding = 'UTF-8-SIG', sep=',', converters={'AGS': str})
treatment = pd.read_csv(f'{path}/data/treatment.csv', converters = {'AGS': str})

Merge df

In [None]:
control_treat = pd.merge(controls, treatment, on='AGS', how='outer', indicator=True)
control_treat['_merge'].value_counts()

Drop early and late years

In [None]:
control_treat = control_treat[control_treat['year'] > 1993]
control_treat = control_treat[control_treat['year'] < 2018]

In [None]:
avg_dfs = {2005: 'treatment_05', 2010: 'treatment_09', 2014: 'treatment_13'}
for year, treatment in avg_dfs.items():
    avg_dfs[year] = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', treatment, 'year']].groupby([treatment, 'year'], as_index = False).agg([np.mean, np.std, 'count'])
    avg_dfs[year] = avg_dfs[year].reset_index()
    avg_dfs[year].rename(columns = {treatment:'treatment'}, inplace = True)
    mean = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'year']].groupby(['year'], as_index = False).agg([np.mean, np.std, 'count'])
    mean = mean.reset_index()
    mean['treatment'] = 'Mean'
    avg_dfs[year] = pd.concat([avg_dfs[year], mean])
avg_dfs[2005][avg_dfs[2005]['treatment'] == 'Mean'].head(40)

Plot the averages of pop_density, unemployed, avg_age and female over years for treated and untreated municipalities

In [None]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km', 'Mean']
blacks = ['#000000', '#191919', '#323232', '#4c4c4c', '#666666', 'blue']
markers = ['o', 's', 'v', 'D', '^', 'h']

for year, df in avg_dfs.items():
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
    # set legend font size
    plt.rc('legend',fontsize=6)
    # title for entire figure
    fig.suptitle(f'Averages values of control variables \n by treatment status in {year}', fontsize=20)
    for i, treatment in enumerate(treatments):
        plot_data = df[df['treatment'] == treatment]
        # edit subplots
        axes[0, 0].set_title('Population Density', fontsize=12)
        axes[0, 0].set_ylabel('Population Density', fontsize=10)
        axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[0, 0].legend(loc='upper left')
        axes[0, 0].axvline(x=year, c='red', linestyle='--')
        axes[0, 0].set_ylim(140, 320)
        axes[0, 0].set_xlim(1994, 2018)
        axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[0, 0].tick_params(labelsize = 8)

        axes[0, 1].set_title('Share of Unemployed', fontsize=12)
        axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
        axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[0, 1].legend(loc='upper left')
        axes[0, 1].axvline(x=year, c='red', linestyle='--')
        axes[0, 1].set_ylim(2, 6)
        axes[0, 1].set_xlim(1997, 2018)
        axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
        axes[0, 1].tick_params(labelsize = 8)

        axes[1, 0].set_title('Average Age', fontsize=12)
        axes[1, 0].set_ylabel('Average Age', fontsize=10)
        axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[1, 0].legend(loc='upper left')
        axes[1, 0].axvline(x=year, c='red', linestyle='--')
        axes[1, 0].set_ylim(40, 46)
        axes[1, 0].set_xlim(2000, 2018)
        axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
        axes[1, 0].tick_params(labelsize = 8)

        axes[1, 1].set_title('Share of Females', fontsize=12)
        axes[1, 1].set_ylabel('% Female', fontsize=10)
        axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
        axes[1, 1].legend(loc='upper left')
        axes[1, 1].axvline(x=year, c='red', linestyle='--')
        axes[1, 1].set_ylim(49.9, 51.1)
        axes[1, 1].set_xlim(1994, 2018)
        axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
        axes[1, 1].tick_params(labelsize = 8)

    # Save figure
    fig.savefig(f'{path}/figures/controls/{year}_controls.png')
    plt.close()

Plot graph for ever-treated status

In [None]:
avg_ever_treated = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'ever_treated', 'year']].groupby(['ever_treated', 'year'], as_index = False).agg([np.mean, np.std, 'count'])
avg_ever_treated = avg_ever_treated.reset_index()
avg_ever_treated.rename(columns = {'ever_treated':'treatment'}, inplace = True)
mean = control_treat[['pop_density', 'unemployed', 'avg_age', 'female', 'year']].groupby(['year'], as_index = False).agg([np.mean, np.std, 'count'])
mean = mean.reset_index()
mean['treatment'] = 'Mean'
avg_ever_treated = pd.concat([avg_ever_treated, mean])
avg_ever_treated[avg_ever_treated['treatment'] == 'Mean'].head()

In [None]:
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km', 'Mean']
blacks = ['#000000', '#191919', '#323232', '#4c4c4c', '#666666', 'blue']
markers = ['o', 's', 'v', 'D', '^', 'h']

fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), dpi=300, sharex=False)
# set legend font size
plt.rc('legend',fontsize=6)
# title for entire figure
fig.suptitle(f'Averages values of control variables \n by ever treated status', fontsize=20)
#for ax in axes:
    #ax.set_xticks([1994,1998,2002,2005,2009,2013,2017])
for i, treatment in enumerate(treatments):
    plot_data = avg_ever_treated[avg_ever_treated['treatment'] == treatment]
    # edit subplots
    axes[0, 0].set_title('Population Density', fontsize=12)
    axes[0, 0].set_ylabel('Population Density', fontsize=10)
    axes[0, 0].errorbar(plot_data['year'], plot_data['pop_density', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[0, 0].legend(loc='upper left')
    axes[0, 0].set_ylim(140, 320)
    axes[0, 0].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[0, 0].tick_params(labelsize = 8)

    axes[0, 1].set_title('Share of Unemployed', fontsize=12)
    axes[0, 1].set_ylabel('% Unemployed', fontsize=10)
    axes[0, 1].errorbar(plot_data['year'], plot_data['unemployed', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[0, 1].legend(loc='upper left')
    axes[0, 1].set_ylim(2, 6)
    axes[0, 1].set_xlim(1997, 2018)
    axes[0, 1].set_xticks([1998,2002,2005,2009,2013,2017])
    axes[0, 1].tick_params(labelsize = 8)

    axes[1, 0].set_title('Average Age', fontsize=12)
    axes[1, 0].set_ylabel('Average Age', fontsize=10)
    axes[1, 0].errorbar(plot_data['year'], plot_data['avg_age', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[1, 0].legend(loc='upper left')
    axes[1, 0].set_ylim(40, 46)
    axes[1, 0].set_xlim(2000, 2018)
    axes[1, 0].set_xticks([2002,2005,2009,2013,2017])
    axes[1, 0].tick_params(labelsize = 8)

    axes[1, 1].set_title('Share of Females', fontsize=12)
    axes[1, 1].set_ylabel('% Female', fontsize=10)
    axes[1, 1].errorbar(plot_data['year'], plot_data['female', 'mean'], c=blacks[i], marker = markers[i], label=treatment, lw=0.8, ms=4, capsize=4)
    axes[1, 1].legend(loc='upper left')
    axes[1, 1].set_ylim(49.9, 51.1)
    axes[1, 1].set_xlim(1994, 2018)
    axes[1, 1].set_xticks([1994,1998,2002,2005,2009,2013,2017])
    axes[1, 1].tick_params(labelsize = 8)

# Save figure
fig.savefig(f'{path}/figures/controls/ever_treated_controls.png')
plt.close()

Plot a map of municipalities with controls available

In [None]:
# Load shapefiles
import geopandas as gpd
municipalities = gpd.read_file(f'{path}/data/geodata/municipalities.shp')
states = gpd.read_file(f'{path}/data/geodata/VG250_LAN.shp')
powerlines = gpd.read_file(f'{path}/data/geodata/all_powerlines.shp')
powerlines = powerlines.set_geometry('geometry')
# set year to treatment year
powerlines['year'] = np.where(powerlines['year'] == '2013', '2014', powerlines['year'])
powerlines['year'] = np.where(powerlines['year'] == '2009', '2010', powerlines['year'])

Subset controls to keep only the obs were all controls are available

In [None]:
controls.columns

In [None]:
controls_sub = controls.dropna(subset=['pop_density', 'unemployed', 'avg_age', 'female'])
controls_sub = controls_sub.groupby(['AGS'], as_index=False).first()
controls_sub.shape

In [None]:
control_sub_map = pd.merge(controls_sub, municipalities, on='AGS', how='left', indicator=True)
control_sub_map['_merge'].value_counts()

In [None]:
legend_dict = {2005: 'DENA 2005', 2010: 'EnLAG 2009', 2014: 'BBPlG 2013'}
control_sub_map = control_sub_map.set_geometry('geometry')
# setup figure
fig, ax = plt.subplots(figsize=(10, 8), dpi=300)
ax.set_aspect('equal')
ax.set_axis_off()
municipalities.plot(ax=ax, color='lightblue', edgecolor='blue', lw=0.01, zorder=1)
control_sub_map.plot(ax=ax, color='red', edgecolor='blue', lw=0.01, zorder=2)
states.boundary.plot(ax=ax, color='darkblue', lw = 0.1, zorder=3)
powerlines.plot(ax=ax, column='year', cmap='winter', legend=True, lw=2, zorder=4)
plt.title(f'Municipalities with control variables available', fontsize=20)
#ax.annotate('Source: Rajasthan Datastore, 2019',xy=(0.1, .08), xycoords='figure fraction', horizontalalignment='left', verticalalignment='top', fontsize=12, c='black')
# change legend order
def replace_legend_items(legend, mapping):
    for txt in legend.texts:
        for k,v in mapping.items():
            if txt.get_text() == str(k):
                txt.set_text(v)

replace_legend_items(ax.get_legend(), legend_dict)
plt.savefig(f'{path}/figures/controls/mun_with_controls.png')
plt.close()

Choropleth maps of controls by year

In [116]:
municipalities = gpd.read_file(f'{path}/data/geodata/municipalities.shp')
municipalities = municipalities.set_geometry('geometry')
states = gpd.read_file(f'{path}/data/geodata/VG250_LAN.shp')
powerlines = gpd.read_file(f'{path}/data/geodata/all_powerlines.shp')
powerlines = powerlines.set_geometry('geometry')
powerlines['year'] = powerlines['year'].astype(int)
# set year to treatment year
powerlines['year'] = np.where(powerlines['year'] == 2013, 2014, powerlines['year'])
powerlines['year'] = np.where(powerlines['year'] == 2009, 2010, powerlines['year'])

In [112]:
controls_muns = pd.merge(controls, municipalities, on='AGS')

In [None]:
controls_muns.unemployed.describe

In [120]:
legend_dict = {2005: 'DENA 2005', 2010: 'EnLAG 2009', 2014: 'BBPlG 2013'}
control_vars = {'pop_density': 'Population Density', 'unemployed': 'Share of unemployed', 'avg_age': 'Average Age', 'female': 'Share of Females'}
for var, title in control_vars.items():
    # initialize figure 
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(10, 10), dpi=300, constrained_layout=True)
    # title for entire figure
    fig.suptitle(title, fontsize=20)
    for i, (year, desc) in enumerate(legend_dict.items()):
        # subset powerlines
        plot_pl = powerlines[powerlines['year'] == year]
        # subset controls plot
        controls_plot = controls_muns[controls_muns['year'] == year]
        controls_plot = controls_plot.set_geometry('geometry')
        # draw subfig
        ax[i].set_aspect('equal')
        ax[i].set_adjustable('datalim', share=True)
        ax[i].set_axis_off()
        ax[i].set_title(f'{desc}')
        controls_plot.plot(column=var, ax=ax[i], legend=True, legend_kwds={'orientation': "horizontal"})
        states.boundary.plot(ax=ax[i], color='darkblue', lw = 0.1, zorder=2)
        plot_pl.plot(ax=ax[i], color='red', lw=2, zorder=3)
    plt.savefig(f'{path}/figures/controls/map_{var}.png', bbox_inches="tight", pad_inches=0)
    plt.close()