In [1]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from scipy import stats
import matplotlib.pyplot as plt

In [2]:
data = pd.read_excel("portz data.xlsx")
data["Pol. Affil"] = data["Pol. Affil"] * 100

In [3]:
linreg = stats.linregress(x = data["Pol. Affil"], y = data["HS Ach."])

In [4]:
print("Slope: " + str(linreg.slope))
print("Intercept: " + str(linreg.intercept))
print("R-Squared: " + str(linreg.rvalue ** 2))
print("P-Value: " + str(linreg.pvalue))
print("Std. Error: " + str(linreg.stderr))

t_crit = 2.021 # 95% CI
print("Interval: [" + str(linreg.slope - linreg.stderr * t_crit) + "," + str(linreg.slope + linreg.stderr * t_crit) + "]")

Slope: -0.003790425461224452
Intercept: 33.60027005284658
R-Squared: 6.327049423221099e-05
P-Value: 0.9558218144856352
Std. Error: 0.0680731061463496
Interval: [-0.14136617298299697,0.13378532206054805]


Scatterplot with Regression Line

In [6]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(go.Scatter(x = data["Pol. Affil"], y = data["HS Ach."], mode = "markers", name = "Scatter Plot"))
fig.add_trace(go.Scatter(x = [min(data["Pol. Affil"]), max(data["Pol. Affil"]) ], y = [min(data["Pol. Affil"] * linreg.slope + linreg.intercept), max(data["Pol. Affil"]) * linreg.slope + linreg.intercept], mode = "lines", name = "Line of Best Fit"))
fig.update_xaxes(title = "Percentage Democrat in House and Senate")
fig.update_yaxes(title = "High School Achievement Score")
fig.show()

Residual Plot

In [8]:
fig = make_subplots(rows=1, cols=1)
fig.add_trace(go.Scatter(x = data["Pol. Affil"], y = data["HS Ach."] - (data["Pol. Affil"] * linreg.slope + linreg.intercept), mode = "markers", name = "Scatter Plot"))
fig.update_xaxes(title = "Percentage Democrat in House and Senate")
fig.update_yaxes(title = "High School Achievement Score Residual")
fig.show()

In [12]:
data

Unnamed: 0,State,HS Ach.,Pol. Affil
0,AL,20,24.285714
1,AK,60,38.75
2,AZ,30,47.5
3,AR,38,17.571429
4,CA,17,78.75
5,CO,30,67.472527
6,CT,52,65.783664
7,DE,40,67.421603
8,DC,40,86.666667
9,FL,36,29.583333


In [11]:
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Histogram(x=data["HS Ach."] - (data["Pol. Affil"] * linreg.slope + linreg.intercept), name = "Residuals"), row=1, col=1)

fig.update_xaxes(title="Achievement Score Residuals", row=1, col=1)

fig.update_layout(title_text="Residual Plot")

fig.show()