In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import StandardScaler  
from sklearn.neural_network import MLPClassifier
from sklearn import svm

def train_test_split(df):
    # use features MA5, MA10, ..., OBV to predict Signal
    # split your train and test on date '2021-11-30'
    # date<='2021-11-30': train; date>'2021-11-30': test 
    # X_train/X_test dataframe has columns MA5, MA10, ..., OBV
    # y_train/y_test datafrom has only one column: Signal
    X_train = df[['MA5','MA10','MA20','MACD','K','D','J','RSI6','RSI12','RSI24','BIAS5','BIAS10','BIAS20','OBV']][0:200]
    y_train = df[['Signal']][0:200]
    X_test = df[['MA5','MA10','MA20','MACD','K','D','J','RSI6','RSI12','RSI24','BIAS5','BIAS10','BIAS20','OBV']][200:len(df)]
    y_test = df[['Signal']][200:len(df)]
    
    # print the last OBV indicator in X_train
    print(X_train[['OBV']].loc[199][0])
    # print the first OBV indicator in X_test
    print(X_test[['OBV']].loc[200][0])
    # print the last Signal in y_train
    print(y_train[['Signal']].loc[199][0])
    # print the first Signal in y_test
    print(y_test[['Signal']].loc[200][0])
    
    return X_train, y_train, X_test, y_test

def normalization(X_train, y_train, X_test, y_test):
    # use StandardScaler in sklearn to normalize X_train and X_test
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    #X_train = pd.DataFrame(X_train, index = y_train.index, columns=)
    #X_test = pd.DataFrame(X_test, index = y_test.index)
    
    # print the normalized value of the first MA5 in X_train, keep 6 decimal places
    print("{:.6f}".format(X_train[0][0]))
    # print the normalized value of the first MA5 in X_test, keep 6 decimal places
    print("{:.6f}".format(X_test[0][0]))
    
    return X_train, y_train, X_test, y_test

def SVM(X_train, y_train, X_test, y_test):
    pass
    # use SVM.SVC() in sklearn to fit the data
    # use the default setting
    #print(y_train)
    #print(y_test)
    svm_ = svm.SVC()
    svm_.fit(X_train,y_train)
    # print training accurary, keep 2 decimal places
    train_acc = svm_.score(X_train,y_train)
    print("{:.2f}".format(train_acc))
    # print test accuracy, keep 2 decimal places
    test_acc = svm_.score(X_test,y_test)
    print("{:.2f}".format(test_acc))
    # get predictions from your model on test set
    pred = svm_.predict(X_test)
    
    # print you predictions on the test data as List
    print(pred)
    
    return pred
    
    
def NN(X_train, y_train, X_test, y_test):
    pass
    # use MLPClassifier in sklearn to fit the data
    # set your solver as 'lbfgs', alpha=1e-5, 1 hidden layer with 8 nodes, random_state=1
    nn = MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1,hidden_layer_sizes=(8,))
    nn.fit(X_train,y_train)
    # print training accurary, keep 2 decimal places
    print("{:.2f}".format(nn.score(X_train,y_train)))
    # print test accuracy, keep 2 decimal places
    print("{:.2f}".format(nn.score(X_test,y_test)))
    # get predictions from your model on test set
    pred = nn.predict(X_test)
    
    # print you predictions on the test data as List
    print(pred)
    
    return pred

def backtest(close, pred):
    pass
    # drop the close when date<='2021-11-30'
    close = close.loc[200:len(close)]
    #print(close)
    # calculte the daily return
    close['return'] = close.Close.pct_change()

    # store prediction from SVM/NN in 'signal' column
    close['signal'] = pred

    # use the return data to calculate the cumulative return of long-only strategy
    # buy when signal == 1 and sell when signal != 1
    # assuming reinvesting (i.e. cumprod of return)
    position = np.where(pred == 1, 1, 0)
    close['position'] = position
    #print(close)
    cumprod = close['return'].fillna(0) * close['position'].shift().fillna(0)
    #print(cumprod)
    total = 1
    for i in cumprod:
        total *= (1 + i)
    total-=1
    # print the cumulative return, keep 6 decimal places
    print("{:.6f}".format(total))
    
    

def test_0(df):
    X_train, y_train, X_test, y_test = train_test_split(df)
    return X_train, y_train, X_test, y_test

def test_1(df):
    X_train, y_train, X_test, y_test = train_test_split(df)
    X_train, y_train, X_test, y_test = normalization(X_train, y_train, X_test, y_test)

def test_2(df):
    X_train, y_train, X_test, y_test = train_test_split(df)
    X_train, y_train, X_test, y_test = normalization(X_train, y_train, X_test, y_test)
    pred_SVM = SVM(X_train, y_train, X_test, y_test)
    
def test_3(df):
    X_train, y_train, X_test, y_test = train_test_split(df)
    X_train, y_train, X_test, y_test = normalization(X_train, y_train, X_test, y_test)
    pred_NN = NN(X_train, y_train, X_test, y_test)
    
def test_4(df,close):
    X_train, y_train, X_test, y_test = train_test_split(df)
    X_train, y_train, X_test, y_test = normalization(X_train, y_train, X_test, y_test)
    pred_SVM = SVM(X_train, y_train, X_test, y_test)
    backtest(close, pred_SVM)
    
def test_5(df,close):
    X_train, y_train, X_test, y_test = train_test_split(df)
    X_train, y_train, X_test, y_test = normalization(X_train, y_train, X_test, y_test)
    pred_NN = NN(X_train, y_train, X_test, y_test)
    backtest(close, pred_NN)

    
"""if __name__ == '__main__':
    test_id = int(input().strip())
    row_num = int(input().strip())
    Data = []
    Price = []
    col_names = list(map(str, input().split(',')))
    for i in range(row_num):
        line=list(map(str, input().split(',')))
        line[0] = pd.to_datetime(datetime.strptime(line[0],'%m-%d')).replace(year=2021)
        for j in range(1,14,1):
            line[j] = float(line[j])
        line[14] = int(float(line[14]))
        line[15] = int(float(line[15]))
        Data.append(line[:-1])
        Price.append([pd.to_datetime(line[0]),float(line[16])])
    df = pd.DataFrame(Data, columns= col_names[:-1])
    close = pd.DataFrame(Price, columns=[col_names[0],col_names[16]])
    
    if test_id == 0:
        test_0(df)
    if test_id == 1:
        test_1(df)
    if test_id == 2:
        test_2(df)
    if test_id == 3:
        test_3(df)
    if test_id == 4:
        test_4(df,close)
    if test_id == 5:
        test_5(df,close)"""