In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
%matplotlib inline

# 4.6.2 The stock market data

In [None]:
df = sm.datasets.get_rdataset("Smarket", "ISLR", cache=True).data

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.corr().style.background_gradient(cmap='viridis')

In [None]:
df["Volume"].plot();

# 4.6.2 Logistic Regression

In [None]:
y = pd.get_dummies(df["Direction"])["Up"]
X = df.drop(columns=["Direction", "Today", "Year"])

In [None]:
logit = sm.Logit(y, sm.add_constant(X)).fit()
print(logit.summary())

Access the coefficients and p values

In [None]:
logit.params

In [None]:
logit.pvalues

Display the first 10 predicted probabilities from the training data

In [None]:
predict_prob = logit.predict(sm.add_constant(X))
predict_prob[:10]

Note to self, don't call fittedvalues for logistic regression, it just returns the dot product of the training exogenous variables and the coefficients.

Turn these predictions into a text series

In [None]:
predict_str = pd.Series(data="Down", index=df.index, name="Prediction")
predict_str.loc[predict_prob >0.5] = "Up"

In [None]:
predict_str.head()

Make a confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(df["Direction"], predict_str, labels=["Up", "Down"])
cm

In [None]:
from sklearn.utils.multiclass import unique_labels
# From https://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
def plot_confusion_matrix(y_true, y_pred,
                          normalize=False,
                          title=None,
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if not title:
        if normalize:
            title = 'Normalized confusion matrix'
        else:
            title = 'Confusion matrix, without normalization'

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    # Only use the labels that appear in the data
    classes = unique_labels(y_true, y_pred)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    # We want to show all ticks...
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax




In [None]:
plot_confusion_matrix(df["Direction"], predict_str);

Kind of ugly. I'm a little surprised this isn't a built in. Feels weird I have to hack something together to show it.