In [1]:
pip install ISLP

Collecting ISLP
  Downloading ISLP-0.3.22-py3-none-any.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting numpy<1.25,>=1.7.1 (from ISLP)
  Downloading numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.3/17.3 MB[0m [31m47.8 MB/s[0m eta [36m0:00:00[0m
Collecting lifelines (from ISLP)
  Downloading lifelines-0.28.0-py3-none-any.whl (349 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m349.2/349.2 kB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pygam (from ISLP)
  Downloading pygam-0.9.1-py3-none-any.whl (522 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m522.0/522.0 kB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning (from ISLP)
  Downloading pytorch_lightning-2.2.0.post0-py3-none-any.whl (800 kB)
[2K     [90m

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize)

from ISLP import confusion_table

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.naive_bayes import GaussianNB as NB
from sklearn.neighbors import KNeighborsClassifier as KNN


# Load dataset

In [2]:
data = load_data('Smarket')
data.head(5)

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
0,2001,0.381,-0.192,-2.624,-1.055,5.01,1.1913,0.959,Up
1,2001,0.959,0.381,-0.192,-2.624,-1.055,1.2965,1.032,Up
2,2001,1.032,0.959,0.381,-0.192,-2.624,1.4112,-0.623,Down
3,2001,-0.623,1.032,0.959,0.381,-0.192,1.276,0.614,Up
4,2001,0.614,-0.623,1.032,0.959,0.381,1.2057,0.213,Up


In [21]:
predictors = data.columns.drop(['Direction', 'Year', 'Volume', 'Today'])
design = MS(predictors)
X = design.fit_transform(data)
y = data.Direction == 'Up'

In [12]:
X.head(5)

Unnamed: 0,intercept,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today
0,1.0,0.381,-0.192,-2.624,-1.055,5.01,1.1913,0.959
1,1.0,0.959,0.381,-0.192,-2.624,-1.055,1.2965,1.032
2,1.0,1.032,0.959,0.381,-0.192,-2.624,1.4112,-0.623
3,1.0,-0.623,1.032,0.959,0.381,-0.192,1.276,0.614
4,1.0,0.614,-0.623,1.032,0.959,0.381,1.2057,0.213


In [14]:
y.head(5)

0     True
1     True
2    False
3     True
4     True
Name: Direction, dtype: bool

# Logistic Regression

In [22]:
lr_model = sm.GLM(y, X, family=sm.families.Binomial()) # Create the LR model
result = lr_model.fit() # train the LR model
summarize(result)

Unnamed: 0,coef,std err,z,P>|z|
intercept,0.0742,0.057,1.309,0.191
Lag1,-0.0713,0.05,-1.424,0.155
Lag2,-0.0441,0.05,-0.882,0.378
Lag3,0.0092,0.05,0.185,0.853
Lag4,0.0072,0.05,0.145,0.885
Lag5,0.0093,0.049,0.188,0.851


In [25]:
lr_pred = result.predict()
labels = np.array(['Down'] * len(data))
labels[lr_pred > 0.5] = 'Up'
confusion_table(labels, data.Direction)


Truth,Down,Up
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
Down,116,98
Up,486,550


In [27]:
(116+550)/(116+550+486+98)

0.5328

# LDA

In [28]:
X = X.drop(['intercept'], axis=1)
X.head(5)

Unnamed: 0,Lag1,Lag2,Lag3,Lag4,Lag5
0,0.381,-0.192,-2.624,-1.055,5.01
1,0.959,0.381,-0.192,-2.624,-1.055
2,1.032,0.959,0.381,-0.192,-2.624
3,-0.623,1.032,0.959,0.381,-0.192
4,0.614,-0.623,1.032,0.959,0.381


In [30]:
lda_model = LDA()
lda_model.fit(X, y)
lda_pred = lda_model.predict(X)
confusion_table(lda_pred, y)

Truth,False,True
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
False,113,98
True,489,550


In [31]:
(113+550)/(113+98+489+550)

0.5304

# Naive Bayes

In [32]:
nb_model = NB() # create the model
nb_model.fit(X, y) # train the model
nb_pred = nb_model.predict(X) # test the model
confusion_table(nb_pred, y)

Truth,False,True
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
False,129,101
True,473,547


In [33]:
(129+547)/(129+101+473+547)

0.5408

# KNN

In [42]:
knn_model = KNN(n_neighbors=3) # create the model
knn_model.fit(X, y) # train the model
knn_pred = knn_model.predict(X) # test the model
confusion_table(knn_pred, y) # show the confusion matrix

Truth,False,True
Predicted,Unnamed: 1_level_1,Unnamed: 2_level_1
False,441,151
True,161,497


In [35]:
(441+497)/(441+151+161+497)

0.7504