In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier as dct

In [2]:
df_week = pd.read_csv("SAP_weekly_return_volatility.csv")
df_week.head(5)

Unnamed: 0,Year,Week_Number,mean_return,volatility,label
0,2019,0,-0.105,3.59465,0
1,2019,1,0.6532,1.153409,1
2,2019,2,0.7326,1.198802,1
3,2019,3,0.0735,1.593999,0
4,2019,4,-0.3746,2.056743,0


In [3]:
df_daily = pd.read_csv("SAP_weekly_return_volatility_detailed.csv")
df_daily.head(5)

Unnamed: 0,High,Low,Open,Close,Volume,Adj Close,Return,Date,Week_Number,Year,Day,Weekday,mean_return,volatility
0,99.559998,98.339996,98.57,99.18,506300.0,94.896118,0.0,2019-01-02,0,2019,2,Wednesday,-0.105,3.59465
1,96.830002,95.449997,96.730003,95.459999,807800.0,91.3368,-3.751,2019-01-03,0,2019,3,Thursday,-0.105,3.59465
2,99.199997,96.910004,97.339996,98.739998,1038100.0,94.475128,3.436,2019-01-04,0,2019,4,Friday,-0.105,3.59465
3,100.190002,98.760002,99.440002,99.709999,861600.0,95.403229,0.982,2019-01-07,1,2019,7,Monday,0.6532,1.153409
4,101.480003,100.43,101.370003,101.269997,548600.0,96.895844,1.565,2019-01-08,1,2019,8,Tuesday,0.6532,1.153409


In [4]:
df_week_2019 = df_week[df_week["Year"] == 2019].reset_index(drop = True)
df_week_2020 = df_week[df_week["Year"] == 2020].reset_index(drop = True)

### 1. implement a decision tree and compute its accuracy for year 2

In [5]:
x_train = df_week_2019[["mean_return","volatility"]].values
y_train = df_week_2019["label"].values
x_test = df_week_2020[["mean_return","volatility"]].values
y_test = df_week_2020["label"].values

d_tree = dct(criterion = "entropy")
d_tree.fit(x_train,y_train)

predicted = d_tree.predict(x_test)

accuracy = np.mean(y_test == predicted)

print(f"Decision Tree Classifier accuracy: {accuracy:.2f}")

Decision Tree Classifier accuracy: 0.87


### 2. compute the confusion matrix for year 2

In [6]:
tn, fp, fn, tp = confusion_matrix(y_test, predicted).ravel()
pd.DataFrame([tn, fp, fn, tp], index = ["TN","FP","FN","TP"]).T

Unnamed: 0,TN,FP,FN,TP
0,31,2,5,15


### 3. what is true positive rate and true negative rate for year 2?

In [7]:
tpr = tp / (tp + fn)
tnr = tn / (tn + fp)

print(f"TPR:{tpr:.2f} and TNR:{tnr:.2f}")

TPR:0.75 and TNR:0.94


### 4. implement a trading strategy based on your labels for year 2 and compare the performance with the ”buy-and-hold” strategy. Which strategy results in a larger amount at the end of the year?

In [8]:
df_week_2020["predict"] = predicted
df_daily_2 = df_daily[(df_daily["Year"] == 2020)].copy().reset_index(drop=True)

#### Buy and Hold

In [9]:
def buyhold(ret_list):
    ret = round(((ret_list / 100) + 1).cumprod() * 100,2)
    
    return ret

In [10]:
hold = buyhold(df_daily[(df_daily["Year"] == 2020)]["Return"])
hold.values[-1]

98.71

#### My Strategy

In [11]:
def compute_daily_ret(d_frame1, amt):
    ret = round(((d_frame1["Return"] / 100) + 1).cumprod() * amt,2)

    return round(ret.values[-1],2)

In [12]:
def weekDaily(d_frame_week,d_frame_daily,lbl):
    total_ret = list()
    for i in range(len(d_frame_week)):
        if i > 0:
            year = d_frame_week.loc[i]["Year"]

            week = d_frame_week.loc[i]["Week_Number"]

            prv_week = d_frame_week.loc[i - 1]["Week_Number"]

            frame = d_frame_daily[(d_frame_daily["Week_Number"] == week)]

            if d_frame_week.loc[i - 1][lbl] == 0 and d_frame_week.loc[i][lbl] == 1:

                amount = total_ret[i - 1] 
                total_ret.append(compute_daily_ret(frame, amount))
                
            elif d_frame_week.loc[i - 1][lbl] == 1 and d_frame_week.loc[i][lbl] == 1:

                amount = total_ret[i - 1]
                total_ret.append(compute_daily_ret(frame, amount))
            else:
                total_ret.append(total_ret[i - 1])
        else:     
            total_ret.append(100)
    return total_ret

In [13]:
daily_ret = weekDaily(df_week_2020,df_daily_2,"predict")
daily_ret[-1]

212.11

In [14]:
print(f"""
2020 return using Buy and Hold: $ {hold.values[-1]} and 
2020 return using LDA: $ {daily_ret[-1]}
""")


2020 return using Buy and Hold: $ 98.71 and 
2020 return using LDA: $ 212.11

