In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA

In [28]:
df_weekly = pd.read_csv("SAP_weekly_return_volatility.csv")
df_weekly.head(5)

Unnamed: 0,Year,Week_Number,mean_return,volatility,label
0,2019,0,-0.105,3.59465,0
1,2019,1,0.6532,1.153409,1
2,2019,2,0.7326,1.198802,1
3,2019,3,0.0735,1.593999,0
4,2019,4,-0.3746,2.056743,0


In [29]:
df_daily = pd.read_csv("SAP_weekly_return_volatility_detailed.csv")
df_daily.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Return,Date,Week_Number,Year,Day,Weekday,mean_return,volatility
0,99.559998,98.339996,98.57,99.18,506300.0,94.896118,0.0,2019-01-02,0,2019,2,Wednesday,-0.105,3.59465
1,96.830002,95.449997,96.730003,95.459999,807800.0,91.3368,-3.751,2019-01-03,0,2019,3,Thursday,-0.105,3.59465
2,99.199997,96.910004,97.339996,98.739998,1038100.0,94.475128,3.436,2019-01-04,0,2019,4,Friday,-0.105,3.59465
3,100.190002,98.760002,99.440002,99.709999,861600.0,95.403229,0.982,2019-01-07,1,2019,7,Monday,0.6532,1.153409
4,101.480003,100.43,101.370003,101.269997,548600.0,96.895844,1.565,2019-01-08,1,2019,8,Tuesday,0.6532,1.153409


In [30]:
df_week_2019 = df_weekly[df_weekly["Year"] == 2019].reset_index(drop = True)
df_week_2020 = df_weekly[df_weekly["Year"] == 2020].reset_index(drop = True)

#### Function for LDA

In [31]:
def lda(x_train, x_test, y_train, y_test):
    lda_classifier = LDA(n_components = 1)
    lda_classifier.fit(x_train, y_train)

    predicted = lda_classifier.predict(x_test)
    accuracy = np.mean(y_test == predicted)
    w = lda_classifier.coef_
    
    return [accuracy,w,predicted]

#### Function for QDA

In [32]:
def qda(x_train1, x_test1, y_train1, y_test1):
    qda_classifier = QDA()
    qda_classifier.fit(x_train1, y_train1)
    
    predicted1 = qda_classifier.predict(x_test1)
    accuracy1 = np.mean(y_test1 == predicted1)
    
    return [accuracy1,predicted1]

# 1. what is the equation for linear and quadratic classifier found from year 1 data?

In [33]:
x_eq_1, x_eq_2, y_eq_1, y_eq_2  = train_test_split(df_week_2019[["mean_return","volatility"]].values,\
                                     df_week_2019["label"].values, test_size = 0.5,random_state=0)

lda_ = lda(x_eq_1, x_eq_2, y_eq_1, y_eq_2)
qda_ = qda(x_eq_1, x_eq_2, y_eq_1, y_eq_2)

print(f"Equation for Linear Classifier for year 1: {lda_[1]}")

Equation for Linear Classifier for year 1: [[ 5.71345533 -2.95305204]]


### 2. what is the accuracy for year 2 for each classifier. Which classifier is ”better”?

In [34]:
x1 = df_week_2019[["mean_return","volatility"]].values
y1 = df_week_2019["label"].values
x2 = df_week_2020[["mean_return","volatility"]].values
y2 = df_week_2020["label"].values

lda_ret = lda(x1, x2, y1, y2)
qda_ret = qda(x1, x2, y1, y2)

print(f"""
Each classifier accuracy: 
LDA Accuracy {lda_ret[0]:.2f} 
QDA Accuracy {qda_ret[0]:.2f}

By looking at the result LDA had a slight better accuracy than QDA""")


Each classifier accuracy: 
LDA Accuracy 0.85 
QDA Accuracy 0.83

By looking at the result LDA had a slight better accuracy than QDA


### 3. compute the confusion matrix for year 2 for each classifier

#### Confusion Matrix for LDA

In [35]:
tn_lda, fp_lda, fn_lda, tp_lda = confusion_matrix(y2, lda_ret[2]).ravel()
pd.DataFrame([tn_lda, fp_lda, fn_lda, tp_lda], index = ["TN","FP","FN","TP"]).T

Unnamed: 0,TN,FP,FN,TP
0,29,4,4,16


#### Confusion Matrix for QDA

In [36]:
tn_qda, fp_qda, fn_qda, tp_qda = confusion_matrix(y2, qda_ret[1]).ravel()
pd.DataFrame([tn_qda, fp_qda, fn_qda, tp_qda], index = ["TN","FP","FN","TP"]).T

Unnamed: 0,TN,FP,FN,TP
0,29,4,5,15


### 4. what is true positive rate (sensitivity or recall) and true negative rate (specificity) for year 2?

In [37]:
lda_tpr = tp_lda / (tp_lda + fn_lda)
lda_tnr = tn_lda / (tn_lda + fp_lda)

print(f"LDA TPR:{lda_tpr:.2f} and TNR:{lda_tnr:.2f}")

LDA TPR:0.80 and TNR:0.88


In [38]:
qda_tpr = tp_qda / (tp_qda + fn_qda)
qda_tnr = tn_qda / (tn_qda + fp_qda)

print(f"QDA TPR:{qda_tpr:.2f} and TNR:{qda_tnr:.2f}")

QDA TPR:0.75 and TNR:0.88


### 5. implement trading strategyies based on your labels for year 2 (for both linear and quadratic) and compare the perfor- mance with the ”buy-and-hold” strategy. Which strategy results in a larger amount at the end of the year?

In [39]:
df_week_2020["predict_lda"] = lda_ret[2]
df_week_2020["predict_qda"] = qda_ret[1]
df_daily_2 = df_daily[(df_daily["Year"] == 2020)].copy().reset_index(drop=True)
df_daily_2.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Return,Date,Week_Number,Year,Day,Weekday,mean_return,volatility
0,137.139999,135.399994,135.419998,136.809998,2967500.0,132.684326,2.105,2020-01-02,0,2020,2,Thursday,0.018,2.951464
1,134.919998,133.300003,133.300003,133.979996,507800.0,129.939667,-2.069,2020-01-03,0,2020,3,Friday,0.018,2.951464
2,134.220001,133.130005,133.160004,134.199997,442900.0,130.153046,0.164,2020-01-06,1,2020,6,Monday,0.4468,0.825564
3,134.350006,133.440002,134.080002,133.619995,621200.0,129.59053,-0.432,2020-01-07,1,2020,7,Tuesday,0.4468,0.825564
4,135.520004,133.759995,133.899994,135.080002,513300.0,131.0065,1.093,2020-01-08,1,2020,8,Wednesday,0.4468,0.825564


In [40]:
df_week_2020.head(5)

Unnamed: 0,Year,Week_Number,mean_return,volatility,label,predict_lda,predict_qda
0,2020,0,0.018,2.951464,0,0,0
1,2020,1,0.4468,0.825564,1,1,1
2,2020,2,0.4366,1.142404,1,1,1
3,2020,3,-0.15925,0.70663,1,0,0
4,2020,4,-1.217,1.12736,0,0,0


#### Buy and Hold

In [41]:
def buyhold(ret_list):
    ret = round(((ret_list / 100) + 1).cumprod() * 100,2)
    
    return ret

In [42]:
hold = buyhold(df_daily[(df_daily["Year"] == 2020)]["Return"])
hold.values[-1]

98.71

#### My Strategy

In [43]:
def compute_daily_ret(d_frame1, amt):
    ret = round(((d_frame1["Return"] / 100) + 1).cumprod() * amt,2)

    return round(ret.values[-1],2)

In [44]:
def weekDaily(d_frame_week,d_frame_daily,lbl):
    total_ret = list()
    for i in range(len(d_frame_week)):
        if i > 0:
            year = d_frame_week.loc[i]["Year"]

            week = d_frame_week.loc[i]["Week_Number"]

            prv_week = d_frame_week.loc[i - 1]["Week_Number"]

            frame = d_frame_daily[(d_frame_daily["Week_Number"] == week)]

            if d_frame_week.loc[i - 1][lbl] == 0 and d_frame_week.loc[i][lbl] == 1:

                amount = total_ret[i - 1] 
                total_ret.append(compute_daily_ret(frame, amount))
                
            elif d_frame_week.loc[i - 1][lbl] == 1 and d_frame_week.loc[i][lbl] == 1:

                amount = total_ret[i - 1]
                total_ret.append(compute_daily_ret(frame, amount))
            else:
                total_ret.append(total_ret[i - 1])
        else:     
            total_ret.append(100)
    return total_ret

In [45]:
daily_ret_lda = weekDaily(df_week_2020,df_daily_2,"predict_lda")
daily_ret_lda[-1]

284.75

In [46]:
daily_ret_qda = weekDaily(df_week_2020,df_daily_2,"predict_qda")
daily_ret_qda[-1]

232.52

In [47]:
print(f"""
2020 return using Buy and Hold: $ {hold.values[-1]} and 
2020 return using LDA: $ {daily_ret_lda[-1]}
2020 return using QDA: $ {daily_ret_qda[-1]}
""")


2020 return using Buy and Hold: $ 98.71 and 
2020 return using LDA: $ 284.75
2020 return using QDA: $ 232.52

