# Covid Exponential Regression

In [None]:
x = df_covid['day'].values.reshape((-1,1))
y = df_covid['total_cases']

# scatter
plt.scatter(x, y, label='samples');

In [None]:
# build / fit linear model
reg = LinearRegression()

# plot exp model
reg.fit(x, np.log(y))            # NOTE: np.log applied (see previous approach slide)
y_hat = np.exp(reg.predict(x))   # NOTE: np.exp applied (see previous approach slide)
r2_exp = r2_score(y_true=y, y_pred=y_hat)
plt.scatter(x, y, label='samples')
plt.plot(x, y_hat, linewidth=3, color='g', label=f'exponential (r2 = {r2_exp: .2f})')

# extract coefficients and print meaning
b_0_pred = reg.intercept_
b_1_pred = reg.coef_[0]
betahat0_pred = np.exp(b_0_pred)

title = f'model: y_hat = {betahat0_pred:.3f} e^({b_1_pred:.3f} x)'
        
plt.legend()
plt.suptitle(title)
plt.gcf().set_size_inches(7, 5)

# Hank Aaron Polynomial Regression

In [None]:
x = df_aaron['age'].values.reshape((-1,1))
y = df_aaron['HRR']

# estimate cv r2 per degree of polynomial
r2_per_degree = list()
for degree in range(11):
    r2 = cross_val_r2(x, y, degree, n_splits = 23)
    r2_per_degree.append(r2)
    
r2_per_degree = np.maximum(np.array(r2_per_degree), 0)

plt.plot(r2_per_degree)
plt.xlabel('degree')
plt.ylabel(f'$R^2$')

plt.figure()
degree_best = np.argmax(r2_per_degree)
fit_plot_poly(x, y, degree_best)

In [None]:
# project x to polynomial (degree 4 seems best from above)
poly_project = PolynomialFeatures(degree=4)
x_poly = poly_project.fit_transform(x)
    
# fit via linear regression
reg = LinearRegression(fit_intercept=False)
reg.fit(x_poly, y)

y_pred = reg.predict(x_poly)

errors = y - y_pred

#independence
plt.scatter(x = range(len(y)), y = errors)
plt.xlabel('index')
plt.ylabel('errors');

In [None]:
#constant variance
plt.scatter(x = x, y = errors)
plt.xlabel('age')
plt.ylabel('errors');

In [None]:
#normality
stats.probplot(errors, dist="norm", plot=py)
py.show()