# ðŸ“ˆ Introduction to Regression ðŸ“ˆ

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error, r2_score

## Ordinary Least Squares (OLS)

In [3]:
X, y = make_regression(n_samples=100, 
                       n_features=1, 
                       noise=5, random_state=42, 
                       bias=7)

In [4]:
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

In [None]:
ssr = np.sum((y - y_pred) ** 2)
ssr

In [None]:
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

In [None]:
plt.scatter(X, y, label="Data", alpha=0.7, color='black')
plt.plot(X, y_pred, color="grey",label="Regression (OLS) Line")
plt.title("Linear Regression with positive correlation")
plt.xlabel("Independent variables (IVs)")
plt.ylabel("Depdendent variable (DV)")
plt.legend()
plt.show()

## Multiple Linear Regression

In [107]:
X, y = make_regression(n_samples=100, n_features=4, noise=15, random_state=42, bias=5)

In [108]:
model = LinearRegression()
model.fit(X, y)
y_pred = model.predict(X)

In [None]:
print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

In [None]:
print(f"MSE: {mean_squared_error(y, y_pred)}")
print(f"RÂ²: {r2_score(y, y_pred)}")

## Multiple Linear Regression with 'statsmodels'

In [111]:
import statsmodels.api as sm

In [112]:
X_constant = sm.add_constant(X)

In [113]:
model = sm.OLS(y, X_constant).fit()

In [None]:
model.summary()