<a href="https://colab.research.google.com/github/atlantiquesun/Stock_ML/blob/main/train_monthly_series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [66]:
import pandas as pd
from sklearn import preprocessing
import datetime
from dateutil.relativedelta import relativedelta
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from matplotlib import pyplot as plt

In [68]:
def prepare_train_data_series(last_company_index, train_start_month, train_window, sample_window, lag=1):
  '''
  input:
  start_month: the earliest month used in training
  train_start_month: the first day of the first month in this train set
  train_end_month: the first day of the last month in this train set

  return:
  X_train: (samples, timesteps, features)

  currently no time lag
  '''
  train_end_month = train_start_month + relativedelta(months=train_window-1)
  X_train = []
  y_train = []
  trade_tic = []

  companies = pd.read_csv("/content/drive/MyDrive/StockML /Data/companyInfo/companies_final.csv")

  for i in range(companies.shape[0]):
    if(i > last_company_index):break
    company = companies.at[i, 'githubUser']
    ticker = companies.at[i, 'symbol'].upper()

    raw = pd.read_csv("/content/drive/MyDrive/StockML /Data/processedData/cumulativeData/"+company+".csv")
    raw = raw.drop('Unnamed: 0', axis=1)

    #group GitHub data into months
    raw.date = pd.to_datetime(raw['date'])
    df = raw.groupby(pd.Grouper(key="date", freq="M")).sum()
    df['month_end'] = df.index
    df['month_start'] = df['month_end'] + datetime.timedelta(days=1)
    df['month_start'] = df['month_start'].shift(1)
    df[features_column] = df[features_column].shift(lag) #lag GitHub data
    df = df.dropna()
    scaler = MinMaxScaler()
    df[features_column] = scaler.fit_transform(df[features_column])

    #select the data within the current train window
    df = df.loc[(df.month_start >= train_start_month) & (df.month_start <= train_end_month)] 

    #group stock data into months then select the last datum in a month to calculate monthly return
    stock_data = pd.read_csv("/content/drive/MyDrive/StockML /Data/financialData/"+ticker+".csv")
    stock_data['Date'] = pd.to_datetime(stock_data['Date'])
    stock_data = stock_data.groupby(pd.Grouper(key="Date", freq="M")).nth([-1]) #index are month-ends
    stock_data['Date'] = stock_data.index
    stock_data['last_close'] = stock_data.Close.shift(1)
    stock_data['monthly_return'] = (stock_data.Close / stock_data.last_close)-1
    stock_data = stock_data.dropna()

    #the first (sample_window - 1) month do not need to have stock data, so we can pad stock_df
    pad_index = []
    for t in range(1, sample_window):
      pad_index.append(((list(stock_data["Date"])[0]+relativedelta(days=1)) - relativedelta(months=t))-relativedelta(days=1)) 
    stock_pad = pd.DataFrame(index=pad_index, columns=stock_data.columns)
    stock_pad['Date'] = stock_pad.index
    stock_data = pd.concat([stock_pad, stock_data])

    #select the data within the current train window
    stock_df = stock_data.loc[(stock_data.Date >= train_start_month) & (stock_data.Date < (train_end_month+relativedelta(months=1)))]

    #some months may not have stock data
    df = df.loc[df['month_end'].isin(list(stock_df['Date']))]
    #print("check if two are the same:", df.shape, stock_df.shape, list(df['month_end'])[0], list(stock_df['Date'])[0], list(df['month_end'])[-1], list(stock_df['Date'])[-1])

    df_X = df[features_column]
    df_y = stock_df['monthly_return']
    
    if(df.shape[0] < sample_window):
      continue
    else:
      trade_tic.append(ticker)
      for j in range(df.shape[0]-sample_window+1):
        X_train.append(df_X.values[j:j+sample_window, :])
        y_train.append(df_y.values[j+sample_window-1])
    
  X_train = np.asarray(X_train)
  y_train = np.asarray(y_train)
  
  return (X_train, y_train, trade_tic)


# Model

In [69]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import Ridge

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor

from sklearn.model_selection import TimeSeriesSplit, GridSearchCV,RandomizedSearchCV

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

def train_lstm(X_train, y_train, n_features=1):
    
    # Initialising the RNN
    regressor = Sequential()
    # Adding the first LSTM layer and some Dropout regularisation
    regressor.add(LSTM(units = 80, return_sequences = True, input_shape = (X_train.shape[1], n_features)))
    regressor.add(Dropout(0.2))

    # Adding a second LSTM layer and some Dropout regularisation
    regressor.add(LSTM(units = 40, return_sequences = True))
    regressor.add(Dropout(0.2))

    # Adding a third LSTM layer and some Dropout regularisation
    regressor.add(LSTM(units = 20, return_sequences = False))
    regressor.add(Dropout(0.2))

    # Adding a fourth LSTM layer and some Dropout regularisation
    #regressor.add(LSTM(units = 20,return_sequences = False))
    #regressor.add(Dropout(0.2))

    # Adding the output layer
    regressor.add(Dense(units = 1, activation='tanh'))
    
    #scoring_method = 'neg_mean_absolute_error'
    # scoring_method = 'neg_mean_squared_error'
    #scoring_method = 'neg_mean_squared_log_error'
    # Compiling the RNN
    regressor.compile(optimizer = 'adam', loss = 'mean_absolute_error')

    # Fitting the RNN to the Training set
    regressor.fit(X_train, y_train, epochs = 4, batch_size = 64)
    return regressor



def evaluate_model(model, X_test, y_test):
    from sklearn.metrics import mean_squared_error
    #from sklearn.metrics import mean_squared_log_error

    from sklearn.metrics import mean_absolute_error
    from sklearn.metrics import explained_variance_score
    from sklearn.metrics import r2_score
    y_predict = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_predict)
    

    mse = mean_squared_error(y_test, y_predict)
    #msle = mean_squared_log_error(y_test, y_predict)

    explained_variance = explained_variance_score(y_test, y_predict)
    r2 = r2_score(y_test, y_predict)

    return mae



def append_return_table(df_predict, unique_datetime, y_trade_return, trade_tic, current_index):
    tmp_table = pd.DataFrame(columns=trade_tic)
    tmp_table = tmp_table.append(pd.Series(y_trade_return, index=trade_tic), ignore_index=True)
    df_predict.loc[unique_datetime[current_index]] = tmp_table.loc[0]


# Training

In [70]:
last_company_index = 17
start_month=pd.to_datetime("2010-01-01")
train_window = 36
test_window = 9
sample_window = 6
features_column = ['star', 'fork', 'issue', 'commit', 'issueClosed', 'pullRequest', 'pullRequestClosed', 'pullRequestMerged']

train_start_months = []
for i in range(0, 90):
  train_start_months.append(start_month+relativedelta(months=i))
print(train_start_months[-1], train_start_months[-1]+relativedelta(months=train_window-1), train_start_months[-1]+relativedelta(months=train_window))

#prepare trade month_start info
unique_datetime = [x+relativedelta(months=train_window+test_window) for x in train_start_months]
companies = pd.read_csv("/content/drive/MyDrive/StockML /Data/companyInfo/companies_final.csv")
unique_ticker = list(companies["symbol"])


df_predict_lr = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_predict_lasso = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_predict_ridge = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_predict_rf = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_predict_svm= pd.DataFrame(columns=unique_ticker, index=unique_datetime)
#df_predict_gbm = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
#df_predict_ada = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_predict_lstm = pd.DataFrame(columns=unique_ticker, index=unique_datetime)

df_predict_best = pd.DataFrame(columns=unique_ticker, index=unique_datetime)
df_best_model_name = pd.DataFrame(columns=['model_name'], index=unique_datetime)
evaluation_record = []

for i in range(len(unique_datetime)):
  #e.g. train_start_month = 2010-01-01, train_end_month=2012-12-01
  #test_start_month = 2013-01-01, test_end_month=2013-09-01
  #trade_month = 2013-10-01 (unique_datetime[i]), will use data from 2013-05-01

  test_start_month = train_start_months[i] + relativedelta(months=train_window)
  trade_data_start_month = unique_datetime[i] + relativedelta(months= (1-sample_window))

  X_train, y_train, _ = prepare_train_data_series(last_company_index, train_start_months[i], train_window, sample_window)
  print("train:", X_train.shape, y_train.shape)

  X_test, y_test, _ = prepare_train_data_series(last_company_index, test_start_month, test_window, sample_window)
  print("test:", X_test.shape, y_test.shape)

  X_trade, y_trade, trade_tic = prepare_train_data_series(last_company_index, trade_data_start_month, sample_window, sample_window)
  print("trade:", X_trade.shape, y_trade.shape)

  lstm_model = train_lstm(X_train, y_train, len(features_column))
  lstm_eval = evaluate_model(lstm_model, X_test, y_test)

  y_trade_lstm = lstm_model.predict(X_trade).flatten()
  append_return_table(df_predict_lstm, unique_datetime, y_trade_lstm, trade_tic, current_index=i)

  print("Trade month:", unique_datetime[i] )


2017-06-01 00:00:00 2020-05-01 00:00:00 2020-06-01 00:00:00
train: (188, 6, 8) (188,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


test: (28, 6, 8) (28,)
trade: (7, 6, 8) (7,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2013-10-01 00:00:00
train: (189, 6, 8) (189,)
test: (28, 6, 8) (28,)
trade: (7, 6, 8) (7,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2013-11-01 00:00:00
train: (190, 6, 8) (190,)
test: (28, 6, 8) (28,)
trade: (8, 6, 8) (8,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2013-12-01 00:00:00
train: (191, 6, 8) (191,)
test: (29, 6, 8) (29,)
trade: (8, 6, 8) (8,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-01-01 00:00:00
train: (192, 6, 8) (192,)
test: (30, 6, 8) (30,)
trade: (8, 6, 8) (8,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-02-01 00:00:00
train: (193, 6, 8) (193,)
test: (31, 6, 8) (31,)
trade: (8, 6, 8) (8,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-03-01 00:00:00
train: (194, 6, 8) (194,)
test: (32, 6, 8) (32,)
trade: (8, 6, 8) (8,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-04-01 00:00:00
train: (195, 6, 8)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


train: (197, 6, 8) (197,)
test: (33, 6, 8) (33,)
trade: (9, 6, 8) (9,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-07-01 00:00:00
train: (198, 6, 8) (198,)
test: (34, 6, 8) (34,)
trade: (9, 6, 8) (9,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-08-01 00:00:00
train: (199, 6, 8) (199,)
test: (35, 6, 8) (35,)
trade: (9, 6, 8) (9,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-09-01 00:00:00
train: (201, 6, 8) (201,)
test: (36, 6, 8) (36,)
trade: (9, 6, 8) (9,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-10-01 00:00:00
train: (203, 6, 8) (203,)
test: (36, 6, 8) (36,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-11-01 00:00:00
train: (205, 6, 8) (205,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


test: (37, 6, 8) (37,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2014-12-01 00:00:00
train: (207, 6, 8) (207,)
test: (38, 6, 8) (38,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-01-01 00:00:00
train: (209, 6, 8) (209,)
test: (39, 6, 8) (39,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-02-01 00:00:00
train: (211, 6, 8) (211,)
test: (40, 6, 8) (40,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-03-01 00:00:00
train: (214, 6, 8) (214,)
test: (40, 6, 8) (40,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-04-01 00:00:00
train: (217, 6, 8) (217,)
test: (40, 6, 8) (40,)
trade: (10, 6, 8) (10,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-05-01 00:00:00
train: (220, 6, 8) (220,)
test: (40, 6, 8) (40,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-06-01 00:00:00
train: (223, 6, 8) (223,)
test: (41, 6, 8) (41,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2015-07-01 00:00:00
train: (226, 6, 8) (226,)
t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


train: (258, 6, 8) (258,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-04-01 00:00:00
train: (262, 6, 8) (262,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-05-01 00:00:00
train: (266, 6, 8) (266,)
test: (44, 6, 8) (44,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-06-01 00:00:00
train: (270, 6, 8) (270,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-07-01 00:00:00
train: (274, 6, 8) (274,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-08-01 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


train: (278, 6, 8) (278,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-09-01 00:00:00
train: (282, 6, 8) (282,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-10-01 00:00:00
train: (286, 6, 8) (286,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-11-01 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


train: (290, 6, 8) (290,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2016-12-01 00:00:00
train: (294, 6, 8) (294,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-01-01 00:00:00
train: (298, 6, 8) (298,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-02-01 00:00:00
train: (302, 6, 8) (302,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-03-01 00:00:00
train: (306, 6, 8) (306,)
test: (44, 6, 8) (44,)
trade: (11, 6, 8) (11,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-04-01 00:00:00
train: (309, 6, 8) (309,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


test: (44, 6, 8) (44,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-05-01 00:00:00
train: (312, 6, 8) (312,)
test: (45, 6, 8) (45,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-06-01 00:00:00
train: (315, 6, 8) (315,)
test: (46, 6, 8) (46,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-07-01 00:00:00
train: (318, 6, 8) (318,)
test: (47, 6, 8) (47,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-08-01 00:00:00
train: (321, 6, 8) (321,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-09-01 00:00:00
train: (324, 6, 8) (324,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-10-01 00:00:00
train: (326, 6, 8) (326,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-11-01 00:00:00
train: (328, 6, 8) (328,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2017-12-01 00:00:00
train: (330, 6, 8) (330,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-01-01 00:00:00
train: (332, 6, 8) (332,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-02-01 00:00:00
train: (335, 6, 8) (335,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-03-01 00:00:00
train: (337, 6, 8) (337,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-04-01 00:00:00
train: (339, 6, 8) (339,)
test: (48, 6, 8) (48,)
trade: (12, 6, 8) (12,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-05-01 00:00:00
trai

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-11-01 00:00:00
train: (351, 6, 8) (351,)
test: (51, 6, 8) (51,)
trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2018-12-01 00:00:00
train: (352, 6, 8) (352,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


test: (53, 6, 8) (53,)
trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-01-01 00:00:00
train: (353, 6, 8) (353,)
test: (55, 6, 8) (55,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-02-01 00:00:00
train: (354, 6, 8) (354,)
test: (56, 6, 8) (56,)
trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-03-01 00:00:00
train: (355, 6, 8) (355,)
test: (56, 6, 8) (56,)
trade: (14, 6, 8) (14,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-04-01 00:00:00
train: (356, 6, 8) (356,)
test: (56, 6, 8) (56,)
trade: (15, 6, 8) (15,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-05-01 00:00:00
train: (357, 6, 8) (357,)
test: (57, 6, 8) (57,)
trade: (15, 6, 8) (15,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-06-01 00:00:00
train: (358, 6, 8) (358,)
test: (58, 6, 8) (58,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-07-01 00:00:00
train: (360, 6, 8) (360,)
test: (61, 6, 8) (61,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2019-08-01 00:00:00
train: (363, 6, 8) (363,)
t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-01-01 00:00:00
train: (378, 6, 8) (378,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-02-01 00:00:00


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


train: (382, 6, 8) (382,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-03-01 00:00:00
train: (386, 6, 8) (386,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-04-01 00:00:00
train: (392, 6, 8) (392,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-05-01 00:00:00
train: (398, 6, 8) (398,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-06-01 00:00:00
train: (404, 6, 8) (404,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-07-01 00:00:00
train: (410, 6, 8) (410,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-08-01 00:00:00
train: (416, 6, 8) (416,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value[:, i].tolist())


test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-11-01 00:00:00
train: (431, 6, 8) (431,)
test: (68, 6, 8) (68,)
trade: (17, 6, 8) (17,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2020-12-01 00:00:00
train: (436, 6, 8) (436,)
test: (68, 6, 8) (68,)
trade: (18, 6, 8) (18,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2021-01-01 00:00:00
train: (441, 6, 8) (441,)
test: (69, 6, 8) (69,)
trade: (18, 6, 8) (18,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2021-02-01 00:00:00
train: (446, 6, 8) (446,)
test: (70, 6, 8) (70,)
trade: (18, 6, 8) (18,)
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trade month: 2021-03-01 00:00:00



# Visualisation

In [39]:
def show_single_stock_accuracy(df_predict, unique_datetime, ticker):
  from matplotlib import pyplot
  prediction = df_predict[ticker]

  stock_data = pd.read_csv("/content/drive/MyDrive/StockML /Data/financialData/"+ticker+".csv")
  stock_data['Date'] = pd.to_datetime(stock_data['Date'])
  stock_data = stock_data.groupby(pd.Grouper(key="Date", freq="M")).nth([-1]) #index are month-ends
  stock_data['Date'] = stock_data.index
  stock_data['last_close'] = stock_data.Close.shift(1)
  stock_data['monthly_return'] = (stock_data.Close / stock_data.last_close)-1
  stock_data = stock_data.loc[(stock_data["Date"] >= unique_datetime[0]) & (stock_data["Date"] < (unique_datetime[-1]+relativedelta(months=1)))]


  actual = list(stock_data["monthly_return"])
  prediction = list(df_predict[ticker].dropna())
  df_predict.to_csv("/content/drive/MyDrive/StockML /Data/prediction_shop.csv")
  
  pred = pd.DataFrame(index=list(range(len(actual))), columns = ["monthly_return"])
  act = pd.DataFrame(index=list(range(len(actual))), columns = ["monthly_return"])

  pred["monthly_return"] = prediction
  act["monthly_return"] = actual
  pred.plot()
  plt.show()
  act.plot()
  plt.show()

In [40]:
show_single_stock_accuracy(df_predict_lstm, unique_datetime, "SHOP")

ValueError: ignored