# Install and Load Packages

In [None]:
pip install statsmodels

In [None]:
pip install numpy

In [None]:
pip install pandas

In [None]:
pip install sklearn

In [None]:
pip install matplotlib

In [None]:
import statsmodels.api as sm
import numpy as np
import pandas as pd
from sklearn import datasets 
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

# Load Data

In [None]:
diabetes = pd.read_csv('diabetes_SLR.csv')
display(diabetes)

In [None]:
diabetes = sm.add_constant(diabetes, prepend=False)
display(diabetes)

In [None]:
X = diabetes.loc[:,['Alcaline Deficency','const']][0:30]
y = diabetes.loc[:,['C Peptide']][0:30]

# Fit the Model

In [None]:
res = sm.OLS(y, X).fit()
print(res.summary())

In [None]:
print("Parameters: ", res.params)
print("R2: ", res.rsquared)

In [None]:
display(res.fittedvalues)

In [None]:
pred_ols = res.get_prediction()
iv_l = pred_ols.summary_frame()["obs_ci_lower"]
iv_u = pred_ols.summary_frame()["obs_ci_upper"]

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))

ax.plot(X.loc[:,['Alcaline Deficency']], y.loc[:,['C Peptide']], "o", label="data")
ax.plot(X.loc[:,['Alcaline Deficency']], res.fittedvalues, "r--.", label="OLS")
ax.plot(X.loc[:,['Alcaline Deficency']], iv_u, "r--")
ax.plot(X.loc[:,['Alcaline Deficency']], iv_l, "r--")
ax.legend(loc="best")

# Prediction

In [None]:
prediction = res.predict(diabetes.loc[:,['Alcaline Deficency','const']][30:])
display(prediction)

# Validation

In [None]:
print(mean_absolute_error(diabetes.loc[:,['C Peptide']][30:],prediction))

plt.plot(range(len(prediction)),prediction)
plt.plot(range(len(prediction)),diabetes.loc[:,['C Peptide']][30:])

**Excercise 1**: Implement a function that use the mathematical formulations in slides to calculate the coefficients and the results of the linear regression. Compare your results with the results obtained with Statsmodels. Are these results the same?