First, import needed modules

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

Initalize variables

In [41]:
path = os.getcwd()
print(path)

/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters


Import datasets

In [42]:
ltw = pd.read_csv(f'{path}/data/ltw_treat.csv', encoding = 'UTF-8-SIG', sep=',', converters={'AGS': str})

View summary statistics

In [44]:
columns = ['Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']
ltw.describe().to_csv(f'{path}/tables/ltw/ltw_descriptives.csv', sep=';', encoding='utf-8-sig', decimal=',', columns=columns, float_format='%.2f')
ltw[columns].describe()

Unnamed: 0,Union,SPD,FDP,Linke,Grüne,Andere
count,14319.0,14316.0,14259.0,14311.0,14280.0,14319.0
mean,41.472536,19.724475,6.319968,7.406899,8.325754,16.856586
std,11.183494,11.567003,3.828827,9.12107,4.967469,10.5888
min,5.797101,0.0,0.0,0.202429,0.0,0.0
25%,33.668402,10.117269,3.450118,2.198791,4.625162,7.847499
50%,40.861027,17.197551,5.677155,3.601695,7.245246,15.619877
75%,48.653924,28.269766,8.216152,6.122824,10.94292,24.31573
max,95.454545,58.673469,55.555556,47.5,38.003901,71.186441


In [45]:
ltw['year'].value_counts().to_csv(f'{path}/tables/ltw/ltw_obsyears.csv', sep=';', encoding='utf-8-sig', decimal=',', float_format='%.2f')

Calculate average vote shares in all years by treatment type

In [46]:
avg_dfs = {2010: 'treatment_09', 2014: 'treatment_13'} # does not make sense to plot for treatment in 2005 --> no data
for year, treatment in avg_dfs.items():
    avg_dfs[year] = ltw[['Land','Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere', treatment, 'year']].groupby(['Land', treatment, 'year'], as_index = False).mean()
    avg_dfs[year].rename(columns = {f'{treatment}':'treatment'}, inplace = True)
    mean = ltw[['Land', 'Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere', 'year']].groupby(['Land', 'year'], as_index = False).mean()
    mean['treatment'] = 'Mean'
    avg_dfs[year] = pd.concat([avg_dfs[year], mean])
avg_dfs[2010].head(15)

Unnamed: 0,Land,treatment,year,Union,SPD,FDP,Linke,Grüne,Andere
0,BY,Direct Line,2008,48.745979,27.196271,4.437926,3.438301,4.147399,12.117751
1,BY,Direct Line,2013,47.47637,27.294037,2.461825,1.440152,4.526262,16.846833
2,BY,Direct Line,2018,45.750397,17.326162,2.467114,2.871319,8.737696,22.952205
3,BY,,2008,47.301936,14.173849,6.980801,3.713219,7.849922,20.095036
4,BY,,2013,52.799455,15.732923,2.634354,1.695167,6.907168,20.338346
5,BY,,2018,41.535522,7.386217,4.08264,2.400528,13.548669,31.162162
6,BY,Within 15km,2008,47.069254,25.960231,4.841103,4.043531,4.09494,14.108025
7,BY,Within 15km,2013,48.10266,26.072272,1.988766,1.755293,4.461272,17.769202
8,BY,Within 15km,2018,42.495733,16.874959,3.504479,2.569927,9.441417,25.215061
9,BY,Within 30km,2008,48.924059,20.326455,5.532564,4.299898,5.178007,15.853728


For each state, plot the average vote shares over years for treated and untreated municipalities

In [55]:
states = {'BY': 'Bavaria', 'HE': 'Hesse', 'NW': 'North-Rhine Westphalia', 'NI': 'Lower Saxony', 'TH': 'Thuringia'}
treatments = ['None', 'Direct Line', 'Within 15km', 'Within 30km', 'Within 50km', 'Mean']
blacks = ['#000000', '#191919', '#323232', '#4c4c4c', '#666666', 'blue']
reds = ['#cc0000', '#d11919', '#d63232', '#db4c4c', '#e06666', 'blue']
yellows = ['#ffff00', '#ffff19', '#ffff32', '#ffff4c', '#ffff66', 'blue']
purples = ['#800080', '#8c198c', '#993299', '#a64ca6', '#b266b2', 'blue']
greens = ['#008000', '#198c19', '#329932', '#4ca64c' , '#66b266', 'blue']
greys = ['#808080', '#8c8c8c', '#999999', '#a6a6a6', '#b2b2b2', 'blue']
markers = ['o', 's', 'v', 'D', '^', 'h']

for year, df in avg_dfs.items():
    for abb, state in states.items():
        # keep only obs. corresponding to state
        state_data = df[df['Land'] == abb]
        # get years in a list
        years = state_data.year.unique().astype(int).tolist()
        # initialize figure 
        fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10, 10), dpi=300)
        # set x labels
        plt.setp(axes, xticks=years)
        # set legend font size
        plt.rc('legend',fontsize=6)
        # title for entire figure
        fig.suptitle(f'{state} State Parliament Elections \n Average Vote Shares by treatment status in {year}', fontsize=20)

        for i, treatment in enumerate(treatments):
            plot_data = state_data[state_data['treatment'] == treatment]
            # set linestyle
            if treatment == 'Mean':
                linestyle = '--'
            else:
                linestyle = '-'
            # edit subplots
            axes[0, 0].set_title('Union', fontsize=12)
            axes[0, 0].set_ylabel('% Votes', fontsize=10)
            axes[0, 0].plot(plot_data['year'], plot_data['Union'], color=blacks[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[0, 0].legend(loc='lower left')
            axes[0, 0].axvline(x=year, color='red', linestyle='--')

            axes[0, 1].set_title('SPD', fontsize=12)
            axes[0, 1].set_ylabel('% Votes', fontsize=10)
            axes[0, 1].plot(plot_data['year'], plot_data['SPD'], color=reds[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[0, 1].legend(loc='lower left')
            axes[0, 1].axvline(x=year, color='red', linestyle='--')

            axes[0, 1]

            axes[1, 0].set_title('FDP', fontsize=12)
            axes[1, 0].set_ylabel('% Votes', fontsize=10)
            axes[1, 0].plot(plot_data['year'], plot_data['FDP'], color=yellows[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[1, 0].legend(loc='upper left')
            axes[1, 0].axvline(x=year, color='red', linestyle='--')

            axes[1, 1].set_title('Linke', fontsize=12)
            axes[1, 1].set_ylabel('% Votes', fontsize=10)
            axes[1, 1].plot(plot_data['year'], plot_data['Linke'], color=purples[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[1, 1].legend(loc='upper left')
            axes[1, 1].axvline(x=year, color='red', linestyle='--')

            axes[2, 0].set_title('Grüne', fontsize=12)
            axes[2, 0].set_ylabel('% Votes', fontsize=10)
            axes[2, 0].plot(plot_data['year'], plot_data['Grüne'], color=greens[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[2, 0].legend(loc='upper left')
            axes[2, 0].axvline(x=year, color='red', linestyle='--')

            axes[2, 1].set_title('Andere', fontsize=12)
            axes[2, 1].set_ylabel('% Votes', fontsize=10)
            axes[2, 1].plot(plot_data['year'], plot_data['Andere'], color=greys[i], marker = markers[i], label=treatment, lw=0.8, markersize=4, linestyle=linestyle)
            axes[2, 1].legend(loc='upper left')
            axes[2, 1].axvline(x=year, color='red', linestyle='--')

        # Save figure
        fig.savefig(f'{path}/figures/ltw/ltw_{abb}_{year}_vote_shares.png')
        plt.close()

Plot a map with the partys who received the largest first vote share in each year for each municipality

In [88]:
# Load shapefiles
import geopandas as gpd
municipalities = gpd.read_file(f'{path}/data/geodata/municipalities.shp')
states = gpd.read_file(f'{path}/data/geodata/VG250_LAN.shp')
powerlines = gpd.read_file(f'{path}/data/geodata/all_powerlines.shp')
powerlines = powerlines.set_geometry('geometry')
powerlines['year'] = powerlines['year'].astype(int)
# set year to treatment year
powerlines['year'] = np.where(powerlines['year'] == 2013, 2014, powerlines['year'])
powerlines['year'] = np.where(powerlines['year'] == 2009, 2010, powerlines['year'])

Replace State IDs with Abbreviations 

In [89]:
abb = {'01': 'SH', '02': 'HH', '03': 'NI', '04': 'HB', '05': 'NW', '06': 'HE', '07': 'RP', '08': 'BW', \
         '09': 'BY', '10': 'SL', '11': 'BE', '12': 'BB', '13': 'MV', '14': 'SN', '15': 'ST', '16': 'TH'}
for key, value in abb.items():
    municipalities['SN_L'] = np.where(municipalities['SN_L'] == key, value, municipalities['SN_L'])
    states['SN_L'] = np.where(states['SN_L'] == key, value, states['SN_L'])

In [90]:
# Prepare df with election winners
ltw_win = ltw[['AGS', 'Land', 'year', 'Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']]
ltw_win['winner'] = ltw_win[['Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']].idxmax(axis = 1)
ltw_win = ltw_win[['AGS', 'Land', 'year', 'winner']]
ltw_win['winner'].describe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ltw_win['winner'] = ltw_win[['Union', 'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']].idxmax(axis = 1)


count     14319
unique        6
top       Union
freq      11095
Name: winner, dtype: object

Add colors

In [91]:
# add color column to df
colors = {'Union': '#000000', 'SPD': '#cc0000', 'FDP': '#ffff00', 'Grüne': '#008000', 'Linke': '#800080', 'Andere': '#808080'}
ltw_win['color'] = 0
for party, color in colors.items():
    ltw_win['color'] = np.where(ltw_win['winner'] == party, color, ltw_win['color'])

In [92]:
# Merge win and municipalities
ltw_win_plot = pd.merge(ltw_win, municipalities, on='AGS')
ltw_win_plot = ltw_win_plot.set_geometry('geometry')

In [101]:
from geopandas.tools import overlay
abb = {'BY': 'Bavaria', 'HE': 'Hesse', 'NW': 'North-Rhine Westphalia', 'NI': 'Lower Saxony', 'TH': 'Thuringia'}
legend_dict = {2005: 'DENA 2005', 2010: 'EnLAG 2009', 2014: 'BBPlG 2013'}
# initialize figure 
fig, ax = plt.subplots(nrows=3, ncols=5, figsize=(10, 10), dpi=300, constrained_layout=True)
# title for entire figure
fig.suptitle('State Parliament Election Winners', fontsize=20)
for i, (abb, state) in enumerate(abb.items()):
    # keep only obs. corresponding to state
    state_winners = ltw_win_plot[ltw_win_plot['Land'] == abb]
    municipalities_plot = municipalities[municipalities['SN_L'] == abb]
    states_plot = states[states['SN_L'] == abb]

    # keep only parts of powerlines that intersect state
    powerlines_state = powerlines.overlay(states_plot, how='intersection')
    # get years in a list
    years = state_winners.year.unique().astype(int).tolist()
    for j, year in enumerate(years):
        # subset df for year
        plot_data = state_winners[state_winners['year'] == year]
        # subset powerlines
        plot_pl = powerlines_state[powerlines_state['year'] <= year]
        plot_pl = plot_pl.sort_values('year')
        plot_pl['year'] = plot_pl['year'].astype(str)
        # draw subfig
        ax[j, i].set_aspect('equal')
        ax[j, i].set_adjustable('datalim', share=True)
        ax[j, i].set_axis_off()
        if j == 0:
            ax[j, i].set_title(f'{state}\n{year}')
        else: 
            ax[j, i].set_title(f'{year}')
        plot_data.plot(ax=ax[j, i], color=plot_data['color'], lw=0.01, zorder=1)
        plot_pl.plot(ax=ax[j, i], column='year', cmap='winter', legend=True, lw=2, zorder=2)
        # replace legend
        def replace_legend_items(legend, mapping):
            for txt in legend.texts:
                for k,v in mapping.items():
                    if txt.get_text() == str(k):
                        txt.set_text(v)
        replace_legend_items(ax[j, i].get_legend(), legend_dict)
# remove individual legends and add common 
lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes]
lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
fig.legend(lines, labels)
[[c.get_legend().remove() for c in r] for r in ax]
plt.savefig(f'{path}/figures/ltw/ltw_winners.png', bbox_inches="tight", pad_inches=0)
plt.close()