# Linear Regression

### Loading Libraries

In [None]:
# Numerical Computing
import numpy as np


# Data Manipulation
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
from matplotlib .pyplot import subplots

# StatsModel
import statsmodels .api as sm

# ISLP - Custom Libraries
from ISLP import load_data
from ISLP.models import ( ModelSpec as MS,
summarize,
poly)

In [None]:
from statsmodels .stats. outliers_influence \
import variance_inflation_factor as VIF
from statsmodels .stats.anova import anova_lm

#### Inspecting Objects & Namespaces

In [None]:
dir ()

In [None]:
A = np.array ([3 ,5 ,11])

dir(A)

In [None]:
A.sum ()

### Simple Linear Regression

In [None]:
Boston = load_data("Boston")

Boston.columns

In [None]:
X = pd. DataFrame ({'intercept': np.ones(Boston.shape [0]),
                    'lstat': Boston['lstat']})

X[:4]

In [None]:
y = Boston['medv']

model = sm.OLS(y, X)
results = model.fit ()

In [None]:
summarize(results)

#### Using Transformations: Fit & Transform

In [None]:
design = MS(['lstat'])
design = design.fit(Boston)

X = design. transform (Boston)
X[:4]

In [None]:
design = MS(['lstat'])

X = design. fit_transform (Boston)
X[:4]

In [None]:
# results.summary()

print(results.summary())

In [None]:
results.params

In [None]:
new_df = pd.DataFrame({'lstat':[5, 10, 15]})

newX = design.transform(new_df)
newX

In [None]:
new_predictions = results. get_prediction(newX);

new_predictions . predicted_mean

In [None]:
new_predictions . conf_int (alpha =0.05)

In [None]:
new_predictions.conf_int (obs=True , alpha =0.05)

#### Defining Functions

In [None]:
def abline(ax , b, m):
    "Add a line with slope m and intercept b to ax"
    xlim = ax. get_xlim ()
    ylim = [m * xlim [0] + b, m * xlim [1] + b]
    ax.plot(xlim , ylim)

In [None]:
def abline(ax , b, m, *args , ** kwargs):
    "Add a line with slope m and intercept b to ax"
    xlim = ax. get_xlim ()
    ylim = [m * xlim [0] + b, m * xlim [1] + b]
    ax.plot(xlim , ylim , *args , ** kwargs)

In [None]:
ax = Boston.plot.scatter('lstat', 'medv')

abline(ax,
       results.params [0],
       results.params [1],
       'r--',
       linewidth =3)

plt.grid(True)
plt.show()

In [None]:
ax = subplots (figsize =(8 ,8))[1]

ax.scatter(results.fittedvalues , results.resid, color='gold')
ax. set_xlabel ('Fitted value')
ax. set_ylabel ('Residual')
ax.axhline (0, c='k', ls='--');
plt.grid(True)
plt.show()

In [None]:
infl = results. get_influence ()

ax = subplots (figsize =(8 ,8))[1]
ax.scatter(np.arange(X.shape [0]) , infl. hat_matrix_diag, color='tomato')
ax. set_xlabel ('Index')
ax. set_ylabel ('Leverage')
np.argmax(infl. hat_matrix_diag)
plt.grid(True)
plt.show()

### Multiple Linear Regression

In [None]:
X = MS(['lstat', 'age']). fit_transform (Boston)

model1 = sm.OLS(y, X)
results1 = model1.fit ()

In [None]:
summarize(results1)

In [None]:
terms = Boston.columns.drop('medv')
terms

In [None]:
X = MS(terms). fit_transform (Boston)

model = sm.OLS(y, X)
results = model.fit ()

In [None]:
summarize(results)

In [None]:
minus_age = Boston.columns.drop (['medv', 'age'])

Xma = MS( minus_age ). fit_transform (Boston)
model1 = sm.OLS(y, Xma)

In [None]:
summarize(model1.fit())

### Multivariate Goodness of Fit

#### List Comprehension

In [None]:
vals = [VIF(X, i)
        for i in range (1, X.shape [1])]

vif = pd. DataFrame ({'vif':vals},
                     index=X.columns [1:])

vif

In [None]:
vals = []

for i in range (1, X.values.shape [1]):
    vals.append(VIF(X.values , i))

### Interaction Terms

In [None]:
X = MS(['lstat',
        'age',
        ('lstat', 'age')]). fit_transform (Boston)

model2 = sm.OLS(y, X)

In [None]:
summarize(model2.fit ())

### Non-linear Transformations of the Predictors

In [None]:
X = MS([ poly('lstat', degree =2), 'age']).fit_transform(Boston)

model3 = sm.OLS(y, X)
results3 = model3.fit()

In [None]:
summarize(results3)

In [None]:
anova_lm(results1 , results3)

In [None]:
ax = subplots(figsize =(8 ,8))[1]

ax.scatter(results3 .fittedvalues, results3.resid, color='royalblue')
ax. set_xlabel ('Fitted value')
ax. set_ylabel ('Residual')
ax.axhline (0, c='k', ls='--')

plt.grid(True)
plt.show()

### Qualitative Predictors

In [None]:
import statsmodels.api as sm
from ISLP.models import ModelSpec as MS
from ISLP.models.model_spec import Interaction

Carseats = load_data ('Carseats')

Carseats.columns

In [None]:
# allvars = list(Carseats .columns.drop('Sales'))

# y = Carseats ['Sales']

# final = allvars + [('Income', 'Advertising'),
#                    ('Price ', 'Age ')]

# X = MS(final).fit_transform(Carseats)
# model = sm.OLS(y, X)

In [None]:
Carseats.columns = Carseats.columns.str.strip()

Carseats["Income_x_Advertising"] = Carseats["Income"] * Carseats["Advertising"]
Carseats["Price_x_Age"] = Carseats["Price"] * Carseats["Age"]

y = Carseats["Sales"]
allvars = list(Carseats.columns.drop("Sales"))

spec = MS(allvars)
X = spec.fit_transform(Carseats)

if not hasattr(X, "columns") or "Intercept" not in X.columns:
    X = sm.add_constant(X)

# 6) OLS
model = sm.OLS(y, X).fit()
print(model.summary())