In [10]:
# import the required libraries

import pandas as pd
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

In [2]:
# read the csv and print the first 5 rows

df = pd.read_csv("../Weekly.csv")
df.head()

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
0,1990,0.816,1.572,-3.936,-0.229,-3.484,0.154976,-0.27,Down
1,1990,-0.27,0.816,1.572,-3.936,-0.229,0.148574,-2.576,Down
2,1990,-2.576,-0.27,0.816,1.572,-3.936,0.159837,3.514,Up
3,1990,3.514,-2.576,-0.27,0.816,1.572,0.16163,0.712,Up
4,1990,0.712,3.514,-2.576,-0.27,0.816,0.153728,1.178,Up


In [3]:
# convert the Direction response to binary (0 for down, 1 for Up)
# print the first 5 rows

df["Direction"] = pd.factorize(df.Direction)[0]
df.head()

Unnamed: 0,Year,Lag1,Lag2,Lag3,Lag4,Lag5,Volume,Today,Direction
0,1990,0.816,1.572,-3.936,-0.229,-3.484,0.154976,-0.27,0
1,1990,-0.27,0.816,1.572,-3.936,-0.229,0.148574,-2.576,0
2,1990,-2.576,-0.27,0.816,1.572,-3.936,0.159837,3.514,1
3,1990,3.514,-2.576,-0.27,0.816,1.572,0.16163,0.712,1
4,1990,0.712,3.514,-2.576,-0.27,0.816,0.153728,1.178,1


In [4]:
# consider only data for Year between 1990 and 2008
# assign the response and predictor variables as required

df = df[(df["Year"] >= 1990) & (df["Year"] <= 2008)]

X = df[["Lag2"]]
y = df["Direction"]

In [9]:
# fit the model

qda = QuadraticDiscriminantAnalysis()

model = qda.fit(X, y)

model.score(X, y)

0.5522842639593909

In [6]:
y_pred = model.predict(X)

cm = confusion_matrix(y, y_pred)

tn, fp, fn, tp = cm.ravel()

cm

array([[  0, 441],
       [  0, 544]], dtype=int64)

In [11]:
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
specificity = tn / (tn + fp)
fscore = f1_score(y, y_pred)

precision, recall, specificity, fscore

(0.5522842639593909, 1.0, 0.0, 0.7115761935905821)

In [8]:
# recall exactly the same as LogReg recall
# precision slightly reduced
# 0 tn were 