First, import needed modules

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import statsmodels.formula.api as smf
from linearmodels.panel import PanelOLS as panel
from stargazer.stargazer import Stargazer
from IPython.core.display import HTML
import os

Initalize variables

In [5]:
path = os.getcwd()
print(path)

/Users/jan/Dropbox/UP_EPQM/2222/MA/powerlinemonsters


Import datasets

In [25]:
btw = pd.read_csv(f'{path}/data/btw_treat.csv', encoding = 'UTF-8-SIG', sep=',')
#data = data.set_index(['AGS', 'year'])
#btw = btw[btw["vote_type"] == 'office']

Split data in Erst- and Zweitstimme

In [26]:
erst = btw[btw['first_vote'] == 1]
zweit = btw[btw['second_vote'] == 1]

In [9]:
erst.columns

Index(['AGS', 'GEN', 'year', 'Land', 'state_id', 'Kreis', 'Wahlkreis',
       'post_2002', 'post_2005', 'post_2009', 'post_2013', 'first_vote',
       'second_vote', 'Wähler', 'Gültig', 'Union', 'SPD', 'FDP', 'Linke',
       'Grüne', 'Andere', 'treatment', 'treated_0', 'treated_10', 'treated_20',
       'treated_30', 'treated_50', 'treated_100'],
      dtype='object')

In [27]:
len(erst['Kreis'].unique())

217

Statsmodels

Define a function for regression models

In [52]:
# call: regression(post, dataset, 'vote_type', 'cluster_level', controls='', fe='')
def regression(post, dataset, vote_type, cluster_level, **kwargs):
    tables = []
    results = {}
    treatment_dummies = ['treated_0', 'treated_10', 'treated_20', 'treated_30', 'treated_50', 'treated_100']
    parties = ['Union',	'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']
    post_dummies = {'post_2002': 'between 2002 and 2005', 'post_2005': 'between 2005 and 2009', 'post_2009': 'between 2009 and 2013', 'post_2013': 'between 2013 and 2017'}
    cluster_levels = {'AGS': 'Municipality', 'Kreis': 'County', 'state_id': 'State'}
    n_clust = len(dataset[cluster_level].unique())
    for treatment in treatment_dummies:
        for party in parties:
            if 'controls' in kwargs:
                controls = kwargs['controls']
                results[party] = smf.ols(f'{party} ~ {treatment}*{post} + {controls}', data=dataset).fit(cov_type='cluster', cov_kwds={'groups': np.array(dataset[cluster_level])}, missing='drop')
            elif 'fe' in kwargs:
                fe = kwargs['fe']
                results[party] = smf.ols(f'{party} ~ {treatment}:{post} + {fe}', data=dataset).fit(cov_type='cluster', cov_kwds={'groups': np.array(dataset[cluster_level])}, missing='drop')
            else:
                results[party] = smf.ols(f'{party} ~ {treatment}*{post}', data=dataset).fit(cov_type='cluster', cov_kwds={'groups': np.array(dataset[cluster_level])}, missing='drop')
        stargazer = Stargazer([results['Union'], results['SPD'], results['FDP'], results['Linke'], results['Grüne'], results['Andere']])
        distance = treatment.strip('treated_')
        #stargazer.title(f'Effect on Primary Vote, Municipalities within {distance} km of a powerline project, treatment between 2013 and 2017')
        stargazer.title(f'Effect on {vote_type} Vote, treatment {post_dummies[post]}. Standard errors adjusted for {n_clust} clusters on the {cluster_levels[cluster_level]} level')
        stargazer.custom_columns(['Union',	'SPD', 'FDP', 'Linke', 'Grüne', 'Andere'], [1, 1, 1, 1, 1, 1])
        stargazer.significant_digits(3)
        if 'fe' not in kwargs:
            stargazer.covariate_order([treatment, post, f'{treatment}:{post}'])
        elif 'fe' in kwargs:
            stargazer.covariate_order([f'{treatment}:{post}'])
        #stargazer.rename_covariates(covar_names)
        stargazer.show_model_numbers(False)
        #stargazer.add_custom_notes(['First note', 'Second note'])
        if 'fe' in kwargs:
            if 'state' in fe:
                stargazer.add_line('State FE', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', ])
            if 'year' in fe:
                stargazer.add_line('Year FE', ['Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', ])
        table = stargazer.render_latex()
        tables.append(table)
    return tables

Define a function to export tables in one .tex dok

In [29]:
def tab_to_tex(name, tables):
    table_parts = []
    with open(f'{path}/tables/btw/{name}.tex', 'w') as f:
        header = r'\documentclass[12pt]{article}' + '\n' + r'\usepackage[a1paper]{geometry}' + '\n' + r'\begin{document}' + '\n \n'
        f.write(header)
        for index, table in enumerate(tables, start=1): 
            table = table.replace('t_', 't')
            table = table.replace('d_', 'd')
            table = table.replace('0:p', '0*p')
            lines = r'\hline \\[-1.8ex]'
            table_parts = table.split(lines)
            if index == 1:
                f.write(table_parts[0] + lines +  table_parts[1] + lines + table_parts[2] + lines)
            elif index < len(tables):
                f.write(table_parts[2] + lines)
            elif index == len(tables):
                f.write(table_parts[2] + lines + table_parts[3] + lines + table_parts[4])
        closer = '\n' + r'\end{document}'
        f.write(closer)

DiD for Treatment between 2013 and 2017

In [53]:
post = 'post_2013'
# primary vote
erst17_ags = regression(post, erst, 'Primary', 'AGS')
tab_to_tex('2017_erst_ags', erst17_ags)

erst17_kreis = regression(post, erst, 'Primary', 'Kreis')
tab_to_tex('2017_erst_kreis', erst17_kreis)

erst17_state = regression(post, erst, 'Primary', 'state_id')
tab_to_tex('2017_erst_state', erst17_state)

In [None]:
# secondary vote 
zweit17_tables = regression(post, zweit, 'Secondary')
tab_to_tex('2017_zweit', zweit17_tables)

Placebo DiD for treatments between 2009 and 2013

In [35]:
# Drop all observations after the treatment
erst_pl = erst[erst['year'] <= 2013]
zweit_pl = zweit[zweit['year'] <= 2013]

In [54]:
post = 'post_2009'
# primary vote
erst09_ags = regression(post, erst_pl, 'Primary', 'AGS')
tab_to_tex('2009_erst_ags', erst09_ags)

erst09_kreis = regression(post, erst_pl, 'Primary', 'Kreis')
tab_to_tex('2009_erst_kreis', erst09_kreis)

erst09_state = regression(post, erst_pl, 'Primary', 'state_id')
tab_to_tex('2009_erst_state', erst09_state)

Fixed Effects Models

In [51]:
post = 'post_2013'
# primary vote
erst17_ags_fe = regression(post, erst, 'Primary', 'AGS', fe='C(state_id) + C(year)')
tab_to_tex('2017_erst_ags_fe', erst17_ags_fe)

erst17_kreis_fe = regression(post, erst, 'Primary', 'Kreis', fe='C(state_id) + C(year)')
tab_to_tex('2017_erst_kreis_fe', erst17_kreis_fe)

erst17_state_fe = regression(post, erst, 'Primary', 'state_id', fe='C(state_id) + C(year)')
tab_to_tex('2017_erst_state_fe', erst17_state_fe)



In [207]:
# call: fe_regression(post, dataset, 'vote_type')
def fe_regression(post, dataset, vote_type):
    tables = []
    results = {}
    treatment_dummies = ['treated_0', 'treated_10', 'treated_20', 'treated_30', 'treated_50', 'treated_100']
    parties = ['Union',	'SPD', 'FDP', 'Linke', 'Grüne', 'Andere']
    post_dummies = {'post_2005': 'between 2002 and 2005', 'post_2009': 'between 2005 and 2009', 'post_2013': 'between 2009 and 2013', 'post_2017': 'between 2013 and 2017'}
    dataset = dataset.set_index(['AGS', 'year'])
    for treatment in treatment_dummies:
        for party in parties:
            results[party] = panel.from_formula(f'{party} ~ {treatment}*{post} + EntityEffects + TimeEffects', data=dataset, drop_absorbed=True).fit(cov_type="clustered", cluster_entity=True, cluster_time=True)
            table = results[party].summary.as_latex()
            tables.append(table)
    dataset = dataset.reset_index()
    return tables

Placebo DiD for treatments between 2009 and 2013 with fixed effects

In [203]:
erst_pl = erst_pl.reset_index()

In [208]:
post = 'post_2013'
erst09_fe_tables = fe_regression(post, erst_pl, 'Primary')
tab_to_tex('2009_erst_fe', erst09_tables)
zweit09_fe_tables = fe_regression(post, zweit_pl, 'Secondary')
tab_to_tex('2009_zweit_fe', zweit09_fe_tables)

Variables have been fully absorbed and have removed from the regression:

post_2013, treated_0

  results[party] = panel.from_formula(f'{party} ~ {treatment}*{post} + EntityEffects + TimeEffects', data=dataset, drop_absorbed=True).fit(cov_type="clustered", cluster_entity=True, cluster_time=True)
Variables have been fully absorbed and have removed from the regression:

post_2013, treated_0

  results[party] = panel.from_formula(f'{party} ~ {treatment}*{post} + EntityEffects + TimeEffects', data=dataset, drop_absorbed=True).fit(cov_type="clustered", cluster_entity=True, cluster_time=True)
Variables have been fully absorbed and have removed from the regression:

post_2013, treated_0

  results[party] = panel.from_formula(f'{party} ~ {treatment}*{post} + EntityEffects + TimeEffects', data=dataset, drop_absorbed=True).fit(cov_type="clustered", cluster_entity=True, cluster_time=True)
Variables have been fully absorbed and have removed from the regression:

post_2013, treated_0

  results[par

IndexError: list index out of range

In [209]:
for table in erst09_fe_tables:
    print(table)

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}        &       Union        & \textbf{  R-squared:         }   &    2.435e-05     \\
\textbf{Estimator:}            &      PanelOLS      & \textbf{  R-squared (Between):}  &    -9.572e-05    \\
\textbf{No. Observations:}     &       40720        & \textbf{  R-squared (Within):}   &     -0.0008      \\
\textbf{Date:}                 &  Thu, Aug 18 2022  & \textbf{  R-squared (Overall):}  &     -0.0001      \\
\textbf{Time:}                 &      10:12:22      & \textbf{  Log-likelihood     }   &    -1.128e+05    \\
\textbf{Cov. Estimator:}       &     Clustered      & \textbf{                     }   &                  \\
\textbf{}                      &                    & \textbf{  F-statistic:       }   &      0.7197      \\
\textbf{Entities:}             &       11155        & \textbf{  P-value            }   &      0.3962      \\
\textbf{Avg Obs:}              &       3.6504       & \textbf{  Distribution:     

In [166]:
for table in zweit09_tables: 
    lines = r'\hline \\[-1.8ex]'
    table_parts = table.split(lines)
    for i, part in enumerate(table_parts):
        print(i, part)

0 \begin{table}[!htbp] \centering
  \caption{Effect on Secondary Vote, treatment between 2005 and 2009}
\begin{tabular}{@{\extracolsep{5pt}}lcccccc}
\\[-1.8ex]\hline

1 
\\[-1.8ex] & \multicolumn{1}{c}{Union} & \multicolumn{1}{c}{SPD} & \multicolumn{1}{c}{FDP} & \multicolumn{1}{c}{Linke} & \multicolumn{1}{c}{Grüne} & \multicolumn{1}{c}{Andere}  \\

2 
 C(year)[T.2005.0] & -4.575$^{***}$ & -4.072$^{}$ & 3.151$^{***}$ & 4.252$^{***}$ & -0.067$^{}$ & 1.310$^{}$ \\
  & (nan) & (446.797) & (nan) & (nan) & (128.551) & (158.071) \\
 C(year)[T.2009.0] & -193841156428.372$^{***}$ & 170680760320.902$^{}$ & -58556904386.384$^{***}$ & 2023249103.795$^{***}$ & 118229631782.614$^{}$ & -38535580394.229$^{}$ \\
  & (nan) & (25599664239937408.000) & (nan) & (nan) & (9894754546507084.000) & (9748737480513218.000) \\
 C(year)[T.2013.0] & -193841156418.952$^{***}$ & 170680760323.134$^{}$ & -58556904397.131$^{***}$ & 2023249100.153$^{***}$ & 118229631780.436$^{}$ & -38535580389.311$^{}$ \\
  & (nan) & (280