# Linear regression scratchpad

File: noob/minitab_particle_board.ipynb

In [None]:
# Temporary notebook to develop linear regression example
import pandas as pd
import statsmodels.api as sm
import numpy as np
import matplotlib.cm as cm
import matplotlib.axes as axes
from scipy import stats

In [None]:
c = cm.Paired.colors
# c[0] c[1] ... c[11]
# See "paired" in "qualitative colormaps"
# https://matplotlib.org/tutorials/colors/colormaps.html

In [None]:
def despine(ax: axes.Axes) -> None:
    '''
    Remove the top and right spines of a graph.

    There is only one x axis, on the bottom, and one y axis, on the left.
    '''
    for spine in 'right', 'top':
        ax.spines[spine].set_visible(False)

In [None]:
df = pd.read_csv('particle_board.csv')

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
y = df['Stiffness']
x = df['Density']
x = sm.add_constant(x) # This adds a new column 'const' filled with 1.0.

In [None]:
model = sm.OLS(y, x)

In [None]:
fitted = model.fit()

In [None]:
y_predicted = fitted.predict(x)

In [None]:
df['Stiffness predicted'] = y_predicted

In [None]:
df.head()

In [None]:
df

In [None]:
fitted.summary()

In [None]:
# Residual error
np.sqrt(fitted.mse_resid)

In [None]:
# R squared adjusted, as a fraction
fitted.rsquared_adj

In [None]:
# p values for models terms
fitted.pvalues

In [None]:
# Confidence intervals for the regression coefficients
fitted.conf_int()

In [None]:
# Regression coefficients
fitted.params

In [None]:
fitted.normalized_cov_params

In [None]:
df[['Stiffness predicted plus CI']] = df[['Stiffness predicted']] + 4
df[['Stiffness predicted plus CI']]

In [None]:
A = df[['Density']]
B = df[['Stiffness']]
C = np.transpose(B)

In [None]:
C

In [None]:
D = np.matmul(C, A)

In [None]:
# Scatter plot of actuals and predicted.
ax = df.plot.line(x='Density',
                  y='Stiffness',
                  legend=False,
                  style='.',
                  color=c[1],
                  figsize=(9, 6))
ax.set_ylabel('Stiffness', fontweight='bold')
ax.set_xlabel('Density', fontweight='bold')
df.plot(x='Density',
        y='Stiffness predicted',
        linewidth=2,
        color=c[5],
        legend=False,
        ax=ax)
despine(ax)