In [4]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.style.use('fivethirtyeight')

import statsmodels.regression.linear_model as smf

In [3]:
def Visualizing_Data_At_Threshold (data, threshold, running_variable, outcome_variable):
    #set the running variable's threshold
    data[running_variable] -= threshold
    
    #visualize data with threshold
    plt.figure(figsize=(8,8))
    ax = plt.subplot(3,1,1)
    data.plot.scatter(x=running_variable, y=outcome_variable, ax=ax)
    plt.title("Running Variable (Centered at 0)")
    

In [5]:
def Estimating_Effect (data, threshold, running_variable, outcome_variable):
    #creating a table that estimates the effect of the the treatment on the running variable at the threshold
    rdd_df = data.assign(threshold=(data[running_variable] > 0).astype(int))
    model = smf.wls("all~running_variable*threshold", rdd_df).fit()
    model.summary().tables[1]
    

In [6]:
def Graphing_Running_Variable(data, threshold, running_variable, outcome_variable): 
    #create a scatter plot of the running variable values 
    ax = data.plot.scatter(x=running_variable, y=outcome_variable, color="C0")
    data.assign(predictions=model.fittedvalues).plot(x=running_variable, y="predictions", ax=ax, color="C1")
    plt.title("Regression Discontinuity");
    

In [7]:
def Triangular_Kernel_Weighting(data, threshold, running_variable, outcome_variable): 
    #helper function that does triangular kernel weighting
    def kernel(R, c, h):
        indicator = (np.abs(R-c) <= h).astype(float)
        return indicator * (1 - np.abs(R-c)/h)
    
    #plotting the original data with triangular kernel weighting applied to it
    plt.plot(data[running_variable], kernel(data[running_variable], c=0, h=1))
    plt.xlabel(running_variable)
    plt.ylabel("Weight")
    plt.title("Kernel Weight by Running Variable");
    
    

In [8]:
def Estimating_Effects_With_Kernel(data, threshold, running_variable, outcome_variable): 
    model = smf.wls("all~running_variable*threshold", rdd_df,
                weights=kernel(data[running_variable], c=0, h=1)).fit()

    model.summary().tables[1]

In [9]:
def Sheepskin_Effect(data, threshold, running_variable, outcome_variable): 
    data.plot.scatter(x=running_variable, y=outcome_variable, figsize=(10,5))
    plt.xlabel("Running Variable")
    plt.ylabel("Outcome Variable")


In [10]:
def McCray_Test(data, threshold, running_variable, outcome_variable): 
    plt.figure(figsize=(8,8))
    data['n'] = data[running_variable].value_counts
    ax = plt.subplot(2,1,1)
    data.plot.bar(x=running_variable, y="n", ax=ax)
    plt.title("McCrary Test")
    plt.ylabel("Smoothness at the Threshold")

    ax = plt.subplot(2,1,2, sharex=ax)
    data.replace({1877:1977, 1874:2277}).plot.bar(x=running_variable, y="n", ax=ax)
    plt.xlabel("Running Variable Relative to Cut off")
    plt.ylabel("Spike at the Threshold");