# Scratch pad StatsModels

The purpose of this notebook is to explore single-factor and multiple-factor regression using StatsModels.

In [None]:
import pandas as pd
import matplotlib.cm as cm
import matplotlib.axes as axes
%matplotlib inline

In [None]:
# Colour-blind colour definitions.
c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12 = cm.Paired.colors
# See "paired" in "qualitative colormaps"
# https://matplotlib.org/tutorials/colors/colormaps.html

In [None]:
# Function to remove upper and right graphic borders.
def despine(ax: axes.Axes) -> None:
    'Remove the top and right spines of a graph'
    for spine in 'right', 'top':
        ax.spines[spine].set_color('none')

In [None]:
# Create the dataframe.
# The dependent variable is Stock_Index_Price.
# The independent variables are Interest_Rate and
# Unemployment_Rate.
stock_market = {'Year': [2017,2017,2017,2017,2017,2017,2017,
                         2017,2017,2017,2017,2017,2016,2016,
                         2016,2016,2016,2016,2016,2016,2016,
                         2016,2016,2016],
                'Month': [12, 11,10,9,8,7,6,5,4,3,2,1,12,11,
                          10,9,8,7,6,5,4,3,2,1],
                'Interest_Rate': [2.75,2.5,2.5,2.5,2.5,2.5,
                                  2.5,2.25,2.25,2.25,2,2,2,
                                  1.75,1.75,1.75,1.75,1.75,
                                  1.75,1.75,1.75,1.75,1.75,1.75],
                'Unemployment_Rate': [5.3,5.3,5.3,5.3,5.4,5.6,
                                      5.5,5.5,5.5,5.6,5.7,5.9,
                                      6,5.9,5.8,6.1,6.2,6.1,
                                      6.1,6.1,5.9,6.2,6.2,6.1],
                'Stock_Index_Price': [1464,1394,1357,1293,1256,
                                      1254,1234,1195,1159,1167,
                                      1130,1075,1047,965,943,
                                      958,971,949,884,866,876,
                                      822,704,719]        
                }
df = pd.DataFrame(stock_market,columns=['Year','Month',
                                        'Interest_Rate',
                                        'Unemployment_Rate',
                                        'Stock_Index_Price'])

In [None]:
# Scatter plot of Y v. X.
for xx, yy in ('Interest_Rate', 'Stock_Index_Price'),\
              ('Interest_Rate', 'Unemployment_Rate'):
    ax = df.plot.line(x=xx,
                      y=yy,
                      legend=False,
                      style='.',
                      color=c2,
                      figsize=(9, 6))
    ax.autoscale(enable=True)
    ax.set_title(f'{yy} vs {xx}', fontweight='bold')
    ax.set_ylabel(f'{yy}', fontweight='bold')
    ax.set_xlabel(f'{xx}', fontweight='bold')
    despine(ax)