In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
import torch
import torch.nn as nn
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import requests
from io import StringIO

In [3]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
ABBpath = '/content/drive/MyDrive/CPP-CSS/Stock_Market_Predictor/Datasets/raw-data/ABB-2021-04-08.csv'
df = pd.read_csv(ABBpath)

In [5]:
#df = pd.read_csv('D:/CPP-CSS/Stock_Market_Predictor/Datasets/raw-data/ABB-2021-03-12.csv')
#df['Date'] =pd.to_datetime(df.Date)
df = df.dropna()
df.reset_index(inplace=True, drop=True)
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2001-04-06,17.0,17.0,16.78125,16.828125,8.432508,162100
1,2001-04-09,16.9,17.299999,16.9,17.299999,8.668963,31300
2,2001-04-10,17.75,17.92,17.700001,17.790001,8.914501,39500
3,2001-04-11,17.5,17.6,17.4,17.6,8.819292,17900
4,2001-04-12,17.5,17.5,17.4,17.5,8.769181,33000


In [6]:
MOVING_AVGS = [5,10,20,50,100,200]
DONCHIANS = [5,10,20,50,100,200]
TIME_LAGS = [1,2,3,4,5,6,7,8,9,10]
FORWARD_LAG = 5

# Generating Moving Averages

5, 10, 20, 50, 100, 200 days

In [7]:
for period in MOVING_AVGS:
    indicator_name = "SMA_%d" % (period)
    df[indicator_name] = df['Close'].rolling(period).mean()

# Bollinger Bands

- 20 periods, 2 standard deviations
- 20 periods, 1 standard deviation
- 10 periods, 1 standard deviation
- 20 periods, 2 standard deviations

In [8]:
df['BollingerBand_Up_20_2'] = df['Close'].rolling(20).mean() + 2*df['Close'].rolling(20).std()
df['BollingerBand_Down_20_2'] = df['Close'].rolling(20).mean() - 2*df['Close'].rolling(20).std()
df['BollingerBand_Up_20_1'] = df['Close'].rolling(20).mean() + df['Close'].rolling(20).std()
df['BollingerBand_Down_20_1'] = df['Close'].rolling(20).mean() - df['Close'].rolling(20).std()
df['BollingerBand_Up_10_1'] = df['Close'].rolling(10).mean() + df['Close'].rolling(10).std()
df['BollingerBand_Down_10_1'] = df['Close'].rolling(10).mean() - df['Close'].rolling(10).std()
df['BollingerBand_Up_10_2'] = df['Close'].rolling(10).mean() + 2*df['Close'].rolling(10).std()
df['BollingerBand_Down_10_2'] = df['Close'].rolling(10).mean() - 2*df['Close'].rolling(10).std()

# Donchian Channels

Same periods as moving average

In [9]:
for period in DONCHIANS:
    up_name = "Donchian_Channel_Up_%d" % (period)
    down_name = "Donchian_Channel_Down_%d" % (period)
    
    df[up_name] = df['High'].rolling(period).max()
    df[down_name] = df['Low'].rolling(period).min()

# Time Lags

Calculating a new dataset with various time lags.
This will enable the ability to define our target variable as the close price of X days in the future.

In [10]:
newdata = df['Close'].to_frame()
for lag in TIME_LAGS:
    shift = lag
    shifted = df.shift(shift)
    shifted.columns = [str.format("%s_shifted_by_%d" % (column ,shift)) for column in shifted.columns]
    newdata = pd.concat((newdata,shifted),axis=1)

In [11]:
newdata.head()

Unnamed: 0,Close,Date_shifted_by_1,Open_shifted_by_1,High_shifted_by_1,Low_shifted_by_1,Close_shifted_by_1,Adj Close_shifted_by_1,Volume_shifted_by_1,SMA_5_shifted_by_1,SMA_10_shifted_by_1,SMA_20_shifted_by_1,SMA_50_shifted_by_1,SMA_100_shifted_by_1,SMA_200_shifted_by_1,BollingerBand_Up_20_2_shifted_by_1,BollingerBand_Down_20_2_shifted_by_1,BollingerBand_Up_20_1_shifted_by_1,BollingerBand_Down_20_1_shifted_by_1,BollingerBand_Up_10_1_shifted_by_1,BollingerBand_Down_10_1_shifted_by_1,BollingerBand_Up_10_2_shifted_by_1,BollingerBand_Down_10_2_shifted_by_1,Donchian_Channel_Up_5_shifted_by_1,Donchian_Channel_Down_5_shifted_by_1,Donchian_Channel_Up_10_shifted_by_1,Donchian_Channel_Down_10_shifted_by_1,Donchian_Channel_Up_20_shifted_by_1,Donchian_Channel_Down_20_shifted_by_1,Donchian_Channel_Up_50_shifted_by_1,Donchian_Channel_Down_50_shifted_by_1,Donchian_Channel_Up_100_shifted_by_1,Donchian_Channel_Down_100_shifted_by_1,Donchian_Channel_Up_200_shifted_by_1,Donchian_Channel_Down_200_shifted_by_1,Date_shifted_by_2,Open_shifted_by_2,High_shifted_by_2,Low_shifted_by_2,Close_shifted_by_2,Adj Close_shifted_by_2,...,Donchian_Channel_Down_20_shifted_by_9,Donchian_Channel_Up_50_shifted_by_9,Donchian_Channel_Down_50_shifted_by_9,Donchian_Channel_Up_100_shifted_by_9,Donchian_Channel_Down_100_shifted_by_9,Donchian_Channel_Up_200_shifted_by_9,Donchian_Channel_Down_200_shifted_by_9,Date_shifted_by_10,Open_shifted_by_10,High_shifted_by_10,Low_shifted_by_10,Close_shifted_by_10,Adj Close_shifted_by_10,Volume_shifted_by_10,SMA_5_shifted_by_10,SMA_10_shifted_by_10,SMA_20_shifted_by_10,SMA_50_shifted_by_10,SMA_100_shifted_by_10,SMA_200_shifted_by_10,BollingerBand_Up_20_2_shifted_by_10,BollingerBand_Down_20_2_shifted_by_10,BollingerBand_Up_20_1_shifted_by_10,BollingerBand_Down_20_1_shifted_by_10,BollingerBand_Up_10_1_shifted_by_10,BollingerBand_Down_10_1_shifted_by_10,BollingerBand_Up_10_2_shifted_by_10,BollingerBand_Down_10_2_shifted_by_10,Donchian_Channel_Up_5_shifted_by_10,Donchian_Channel_Down_5_shifted_by_10,Donchian_Channel_Up_10_shifted_by_10,Donchian_Channel_Down_10_shifted_by_10,Donchian_Channel_Up_20_shifted_by_10,Donchian_Channel_Down_20_shifted_by_10,Donchian_Channel_Up_50_shifted_by_10,Donchian_Channel_Down_50_shifted_by_10,Donchian_Channel_Up_100_shifted_by_10,Donchian_Channel_Down_100_shifted_by_10,Donchian_Channel_Up_200_shifted_by_10,Donchian_Channel_Down_200_shifted_by_10
0,16.828125,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,17.299999,2001-04-06,17.0,17.0,16.78125,16.828125,8.432508,162100.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,17.790001,2001-04-09,16.9,17.299999,16.9,17.299999,8.668963,31300.0,,,,,,,,,,,,,,,,,,,,,,,,,,,2001-04-06,17.0,17.0,16.78125,16.828125,8.432508,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,17.6,2001-04-10,17.75,17.92,17.700001,17.790001,8.914501,39500.0,,,,,,,,,,,,,,,,,,,,,,,,,,,2001-04-09,16.9,17.299999,16.9,17.299999,8.668963,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,17.5,2001-04-11,17.5,17.6,17.4,17.6,8.819292,17900.0,,,,,,,,,,,,,,,,,,,,,,,,,,,2001-04-10,17.75,17.92,17.700001,17.790001,8.914501,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
forward_lag = FORWARD_LAG
newdata['target'] = newdata['Close'].shift(-forward_lag)
newdata = newdata.drop('Close',axis=1)
newdata = newdata.dropna()
print(newdata.shape)
newdata.head()

(4818, 331)


Unnamed: 0,Date_shifted_by_1,Open_shifted_by_1,High_shifted_by_1,Low_shifted_by_1,Close_shifted_by_1,Adj Close_shifted_by_1,Volume_shifted_by_1,SMA_5_shifted_by_1,SMA_10_shifted_by_1,SMA_20_shifted_by_1,SMA_50_shifted_by_1,SMA_100_shifted_by_1,SMA_200_shifted_by_1,BollingerBand_Up_20_2_shifted_by_1,BollingerBand_Down_20_2_shifted_by_1,BollingerBand_Up_20_1_shifted_by_1,BollingerBand_Down_20_1_shifted_by_1,BollingerBand_Up_10_1_shifted_by_1,BollingerBand_Down_10_1_shifted_by_1,BollingerBand_Up_10_2_shifted_by_1,BollingerBand_Down_10_2_shifted_by_1,Donchian_Channel_Up_5_shifted_by_1,Donchian_Channel_Down_5_shifted_by_1,Donchian_Channel_Up_10_shifted_by_1,Donchian_Channel_Down_10_shifted_by_1,Donchian_Channel_Up_20_shifted_by_1,Donchian_Channel_Down_20_shifted_by_1,Donchian_Channel_Up_50_shifted_by_1,Donchian_Channel_Down_50_shifted_by_1,Donchian_Channel_Up_100_shifted_by_1,Donchian_Channel_Down_100_shifted_by_1,Donchian_Channel_Up_200_shifted_by_1,Donchian_Channel_Down_200_shifted_by_1,Date_shifted_by_2,Open_shifted_by_2,High_shifted_by_2,Low_shifted_by_2,Close_shifted_by_2,Adj Close_shifted_by_2,Volume_shifted_by_2,...,Donchian_Channel_Up_50_shifted_by_9,Donchian_Channel_Down_50_shifted_by_9,Donchian_Channel_Up_100_shifted_by_9,Donchian_Channel_Down_100_shifted_by_9,Donchian_Channel_Up_200_shifted_by_9,Donchian_Channel_Down_200_shifted_by_9,Date_shifted_by_10,Open_shifted_by_10,High_shifted_by_10,Low_shifted_by_10,Close_shifted_by_10,Adj Close_shifted_by_10,Volume_shifted_by_10,SMA_5_shifted_by_10,SMA_10_shifted_by_10,SMA_20_shifted_by_10,SMA_50_shifted_by_10,SMA_100_shifted_by_10,SMA_200_shifted_by_10,BollingerBand_Up_20_2_shifted_by_10,BollingerBand_Down_20_2_shifted_by_10,BollingerBand_Up_20_1_shifted_by_10,BollingerBand_Down_20_1_shifted_by_10,BollingerBand_Up_10_1_shifted_by_10,BollingerBand_Down_10_1_shifted_by_10,BollingerBand_Up_10_2_shifted_by_10,BollingerBand_Down_10_2_shifted_by_10,Donchian_Channel_Up_5_shifted_by_10,Donchian_Channel_Down_5_shifted_by_10,Donchian_Channel_Up_10_shifted_by_10,Donchian_Channel_Down_10_shifted_by_10,Donchian_Channel_Up_20_shifted_by_10,Donchian_Channel_Down_20_shifted_by_10,Donchian_Channel_Up_50_shifted_by_10,Donchian_Channel_Down_50_shifted_by_10,Donchian_Channel_Up_100_shifted_by_10,Donchian_Channel_Down_100_shifted_by_10,Donchian_Channel_Up_200_shifted_by_10,Donchian_Channel_Down_200_shifted_by_10,target
209,2002-02-08,8.22,8.22,8.06,8.07,4.043845,17300.0,8.072,8.606,9.321,9.9168,9.1416,11.86915,11.223396,7.418604,10.272198,8.369802,9.365608,7.846392,10.125216,7.086784,8.4,7.7,10.1,7.7,10.88,7.7,11.48,7.7,11.6,6.1,18.950001,6.1,2002-02-07,8.0,8.4,8.0,8.3,4.159098,22500.0,...,11.6,9.2,11.6,6.1,18.950001,6.1,2002-01-28,10.1,10.1,9.8,9.95,4.985907,100500.0,9.684,9.959,10.187,10.3676,9.2765,12.270741,11.273764,9.100236,10.730382,9.643618,10.332198,9.585802,10.705396,9.212604,10.1,9.2,10.88,9.2,11.22,9.2,11.6,9.2,11.6,6.1,18.950001,6.1,7.25
210,2002-02-11,8.25,8.3,8.15,8.22,4.119011,33700.0,8.06,8.433,9.196,9.8622,9.1435,11.821,11.038954,7.353046,10.117477,8.274523,9.032668,7.833332,9.632335,7.233665,8.4,7.7,9.9,7.7,10.88,7.7,11.48,7.7,11.6,6.1,18.950001,6.1,2002-02-08,8.22,8.22,8.06,8.07,4.043845,17300.0,...,11.6,8.79,11.6,6.1,18.950001,6.1,2002-01-29,9.9,9.9,9.72,9.75,4.885687,7500.0,9.704,9.884,10.203,10.3566,9.264,12.23535,11.25159,9.15441,10.727295,9.678705,10.208592,9.559408,10.533184,9.234816,10.1,9.2,10.37,9.2,11.22,9.2,11.6,9.2,11.6,6.1,18.950001,6.1,7.15
211,2002-02-12,8.25,8.5,8.18,8.5,4.259317,8400.0,8.188,8.308,9.096,9.8204,9.1585,11.7783,10.85622,7.33578,9.97611,8.21589,8.695321,7.920679,9.082643,7.533357,8.5,7.7,9.2,7.7,10.37,7.7,11.48,7.7,11.6,6.1,18.950001,6.1,2002-02-11,8.25,8.3,8.15,8.22,4.119011,33700.0,...,11.6,8.6,11.6,6.1,18.950001,6.1,2002-01-30,8.9,9.2,8.79,9.15,4.585029,17000.0,9.654,9.762,10.19,10.3316,9.248,12.1946,11.285387,9.094613,10.737694,9.642306,10.111914,9.412086,10.461829,9.062171,10.1,8.79,10.25,8.79,11.22,8.79,11.6,8.79,11.6,6.1,18.950001,6.1,6.99
212,2002-02-13,8.15,8.15,7.9,7.95,3.983714,43300.0,8.208,8.188,8.975,9.7674,9.1672,11.7303,10.698809,7.251191,9.836904,8.113096,8.451599,7.924401,8.715198,7.660802,8.5,7.9,8.84,7.7,10.25,7.7,11.48,7.7,11.6,6.1,18.950001,6.1,2002-02-12,8.25,8.5,8.18,8.5,4.259317,8400.0,...,11.6,8.1,11.6,6.1,18.950001,6.1,2002-01-31,8.84,8.84,8.6,8.65,4.334482,23700.0,9.494,9.612,10.128,10.2836,9.231,12.1489,11.417971,8.838029,10.772986,9.483014,10.079019,9.144981,10.546038,8.677962,10.1,8.6,10.25,8.6,11.22,8.6,11.6,8.6,11.6,6.1,18.950001,6.1,6.95
213,2002-02-14,7.75,8.0,7.6,7.9,3.95866,31900.0,8.128,8.113,8.8625,9.7216,9.1772,11.6838,10.556861,7.168139,9.70968,8.01532,8.333759,7.892241,8.554518,7.671482,8.5,7.6,8.5,7.6,10.25,7.6,11.48,7.6,11.6,6.1,18.950001,6.1,2002-02-13,8.15,8.15,7.9,7.95,3.983714,43300.0,...,11.6,8.1,11.6,6.1,18.950001,6.1,2002-02-01,8.2,8.26,8.1,8.2,4.108987,31900.0,9.14,9.407,10.0075,10.2246,9.2133,12.1019,11.536075,8.478925,10.771788,9.243212,9.996671,8.817329,10.586343,8.227657,10.1,8.1,10.1,8.1,11.22,8.1,11.6,8.1,11.6,6.1,18.950001,6.1,6.6


Our data is now 4818 rows x 331 columns

In [45]:
newdata.to_csv('/content/drive/MyDrive/CPP-CSS/Stock_Market_Predictor/Datasets/Input/ABBInput.csv', index=False)