## References

https://www.kaggle.com/code/mtszkw/technical-indicators-for-trading-stocks
<br>
https://www.kaggle.com/code/thebrownviking20/everything-you-can-do-with-a-time-series
<br>
https://www.diva-portal.org/smash/get/diva2:1775077/FULLTEXT01.pdf
<br>
https://pub.towardsai.net/predicting-stock-prices-using-arima-fourier-transforms-and-technical-indicators-with-deep-43a164859683


## _Time Series theek krna mat bhoolna_
## _Remove weekends Pilich_

## Import dependencies

In [None]:
! pip install -r requirements.txt --quiet

In [None]:
import os
import pandas as pd
import gdown
from datetime import datetime, timedelta

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly as py
import plotly.io as pio
import plotly.graph_objs as go
from plotly.subplots import make_subplots

from pmdarima.arima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

import numpy as np                                    # For matrices!
import matplotlib.pyplot as plt                       # To visualize
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression     # For the regression itself
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import svm
from sklearn.metrics import mean_squared_error, f1_score
from sklearn.preprocessing import OneHotEncoder       # To convert discrete strings to vectors!
from sklearn.preprocessing import normalize           # For normalizing
import seaborn as sns                                 # For plots

## Change default chart layout

In [None]:
# # Show charts when running kernel
# init_notebook_mode(connected=True)
# 
# # Change default background color for all visualizations
# layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
# fig = go.Figure(layout=layout)
# templated_fig = pio.to_templated(fig)
# pio.templates['my_template'] = templated_fig.layout.template
# pio.templates.default = 'my_template'

## Download datasets

In [None]:
# # SnP500
# gdown.download("https://docs.google.com/uc?id=1KveL-W2L6YxO-1NKn0n6FeGTKLWgvAL4", "ES_continuous_adjusted_1min_2011_2018.txt", quiet=True)

# # Nasdaq
# gdown.download("https://docs.google.com/uc?id=1NyHRoU2YiaCKIhy7afUTuAmC0haWK1Ny", "NQ_continuous_adjusted_1min_2011_2018.txt", quiet=True)

## Read datasets as CSV

In [None]:
ES_1min_raw = pd.read_csv('ES_continuous_adjusted_1min_2011_2018.txt', encoding='latin-1')
ES_5min_raw = pd.read_csv('ES_continuous_adjusted_5min_2011_2018.txt', encoding='latin-1')
ES_30min_raw = pd.read_csv('ES_continuous_adjusted_30min_2011_2018.txt', encoding='latin-1')
ES_1hour_raw = pd.read_csv('ES_continuous_adjusted_1hour_2011_2018.txt', encoding='latin-1')

NQ_1min_raw = pd.read_csv('NQ_continuous_adjusted_1min_2011_2018.txt', encoding='latin-1')
NQ_5min_raw = pd.read_csv('NQ_continuous_adjusted_5min_2011_2018.txt', encoding='latin-1')
NQ_30min_raw = pd.read_csv('NQ_continuous_adjusted_30min_2011_2018.txt', encoding='latin-1')
NQ_1hour_raw = pd.read_csv('NQ_continuous_adjusted_1hour_2011_2018.txt', encoding='latin-1')

# Data Processing

## I. Moving Average

Moving Averages (MA) help to smooth out stock prices on a chart by filtering out short-term price fluctuations. We calculate moving averages over a defined period of time e.g. last 9, 50 or 200 days. There are two (most common) averages used in technical analysis which are:

- Simple Moving Average (SMA) - a simple average calculated over last N days e.g. 50, 100 or 200,
- Exponential Moving Average (EMA) - an average where greater weights are applied to recent prices.
MAs and their crossovers (see $Golden\ Cross$ and $Death\ Cross$) are often used as trade signals as they are so simple yet powerful.

In [None]:
# SnP500

ES_1min_raw['EMA_9'] = ES_1min_raw['Close'].ewm(7200).mean().shift()
ES_1min_raw['SMA_50'] = ES_1min_raw['Close'].rolling(72000).mean().shift()
ES_1min_raw['SMA_100'] = ES_1min_raw['Close'].rolling(144000).mean().shift()
ES_1min_raw['SMA_200'] = ES_1min_raw['Close'].rolling(288000).mean().shift()

ES_5min_raw['EMA_9'] = ES_5min_raw['Close'].ewm(1440).mean().shift()
ES_5min_raw['SMA_50'] = ES_5min_raw['Close'].rolling(14400).mean().shift()
ES_5min_raw['SMA_100'] = ES_5min_raw['Close'].rolling(28800).mean().shift()
ES_5min_raw['SMA_200'] = ES_5min_raw['Close'].rolling(57600).mean().shift()

ES_30min_raw['EMA_9'] = ES_30min_raw['Close'].ewm(240).mean().shift()
ES_30min_raw['SMA_50'] = ES_30min_raw['Close'].rolling(2400).mean().shift()
ES_30min_raw['SMA_100'] = ES_30min_raw['Close'].rolling(4800).mean().shift()
ES_30min_raw['SMA_200'] = ES_30min_raw['Close'].rolling(9600).mean().shift()

ES_1hour_raw['EMA_9'] = ES_1hour_raw['Close'].ewm(120).mean().shift()
ES_1hour_raw['SMA_50'] = ES_1hour_raw['Close'].rolling(1200).mean().shift()
ES_1hour_raw['SMA_100'] = ES_1hour_raw['Close'].rolling(2400).mean().shift()
ES_1hour_raw['SMA_200'] = ES_1hour_raw['Close'].rolling(4800).mean().shift()

# Nasdaq

NQ_1min_raw['EMA_9'] = NQ_1min_raw['Close'].ewm(7200).mean().shift()
NQ_1min_raw['SMA_50'] = NQ_1min_raw['Close'].rolling(72000).mean().shift()
NQ_1min_raw['SMA_100'] = NQ_1min_raw['Close'].rolling(144000).mean().shift()
NQ_1min_raw['SMA_200'] = NQ_1min_raw['Close'].rolling(288000).mean().shift()

NQ_5min_raw['EMA_9'] = NQ_5min_raw['Close'].ewm(1440).mean().shift()
NQ_5min_raw['SMA_50'] = NQ_5min_raw['Close'].rolling(14400).mean().shift()
NQ_5min_raw['SMA_100'] = NQ_5min_raw['Close'].rolling(28800).mean().shift()
NQ_5min_raw['SMA_200'] = NQ_5min_raw['Close'].rolling(57600).mean().shift()

NQ_30min_raw['EMA_9'] = NQ_30min_raw['Close'].ewm(240).mean().shift()
NQ_30min_raw['SMA_50'] = NQ_30min_raw['Close'].rolling(2400).mean().shift()
NQ_30min_raw['SMA_100'] = NQ_30min_raw['Close'].rolling(4800).mean().shift()
NQ_30min_raw['SMA_200'] = NQ_30min_raw['Close'].rolling(9600).mean().shift()

NQ_1hour_raw['EMA_9'] = NQ_1hour_raw['Close'].ewm(120).mean().shift()
NQ_1hour_raw['SMA_50'] = NQ_1hour_raw['Close'].rolling(1200).mean().shift()
NQ_1hour_raw['SMA_100'] = NQ_1hour_raw['Close'].rolling(2400).mean().shift()
NQ_1hour_raw['SMA_200'] = NQ_1hour_raw['Close'].rolling(4800).mean().shift()

In [None]:
# Plotting MA

def plotMA(ES_1hour_raw):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=ES_1hour_raw.DateTime, y=ES_1hour_raw.EMA_9, name='EMA 9'))
    fig.add_trace(go.Scatter(x=ES_1hour_raw.DateTime, y=ES_1hour_raw.SMA_50, name='SMA 50'))
    fig.add_trace(go.Scatter(x=ES_1hour_raw.DateTime, y=ES_1hour_raw.SMA_100, name='SMA 100'))
    fig.add_trace(go.Scatter(x=ES_1hour_raw.DateTime, y=ES_1hour_raw.SMA_200, name='SMA 200'))
    fig.add_trace(go.Scatter(x=ES_1hour_raw.DateTime, y=ES_1hour_raw.Close, name='Close', line_color='dimgray', opacity=0.3))
    fig.show()

# plotMA(ES_1hour_raw)
# plotMA(NQ_1hour_raw)

## II. RSI

Another commonly used indicator is a Relative Strength Index (RSI) that indicates magnitude of recent price changes. It can show that a stock is either overbought or oversold. Typically RSI value of 70 and above signal that a stock is becoming overbought/overvalued, meanwhile value of 30 and less can mean that it is oversold. Full range of RSI is from 0 to 100.

In [None]:
def RSI(df, n=14):
    close = df['Close']
    delta = close.diff()
    delta = delta[1:]
    pricesUp = delta.copy()
    pricesDown = delta.copy()
    pricesUp[pricesUp < 0] = 0
    pricesDown[pricesDown > 0] = 0
    rollUp = pricesUp.rolling(n).mean()
    rollDown = pricesDown.abs().rolling(n).mean()
    rs = rollUp / rollDown
    rsi = 100.0 - (100.0 / (1.0 + rs))
    return rsi

# SnP500

ES_1min_raw['RSI'] = RSI(ES_1min_raw).fillna(0)
ES_5min_raw['RSI'] = RSI(ES_5min_raw).fillna(0)
ES_30min_raw['RSI'] = RSI(ES_30min_raw).fillna(0)
ES_1hour_raw['RSI'] = RSI(ES_1hour_raw).fillna(0)

# Nasdaq

NQ_1min_raw['RSI'] = RSI(NQ_1min_raw).fillna(0)
NQ_5min_raw['RSI'] = RSI(NQ_5min_raw).fillna(0)
NQ_30min_raw['RSI'] = RSI(NQ_30min_raw).fillna(0)
NQ_1hour_raw['RSI'] = RSI(NQ_1hour_raw).fillna(0)

In [None]:
# Plotting RSI

def plotRSI(df):
    num_days = len(df)
    fig = go.Figure(go.Scatter(x=df.DateTime.tail(num_days), y=df.RSI.tail(num_days)))
    fig.add_hline(y=70)
    fig.add_hline(y=30)
    fig.show()

# plotRSI(NQ_1hour_raw)
# plotRSI(ES_1hour_raw)

## III. MACD

Moving Average Convergence Divergence (MACD) is an indicator which shows the relationship between two exponential moving averages i.e. 12-day and 26-day EMAs. We obtain MACD by substracting 26-day EMA (also called slow EMA) from the 12-day EMA (or fast EMA).

In [None]:
def calc_macd(df, timescale):
    df["EMA_12"] = pd.Series(df['Close'].ewm(span=12*timescale, min_periods=12).mean())
    df["EMA_26"] = pd.Series(df['Close'].ewm(span=26*timescale, min_periods=26).mean())
    df["MACD"] = pd.Series(df.EMA_12 - df.EMA_26)
    df["MACD_signal"] = pd.Series(df.MACD.ewm(span=9*timescale, min_periods=9).mean())

# SnP500

calc_macd(ES_1min_raw, 1440)
calc_macd(ES_5min_raw, 288)
calc_macd(ES_30min_raw, 48)
calc_macd(ES_1hour_raw, 24)

# Nasdaq

calc_macd(NQ_1min_raw, 1440)
calc_macd(NQ_5min_raw, 288)
calc_macd(NQ_30min_raw, 48)
calc_macd(NQ_1hour_raw, 24)

In [None]:
# Plotting MACD

def plotMACD(df):
    fig = make_subplots(rows=2, cols=1)
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.Close, name='Close'), row=1, col=1)
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.EMA_12, name='EMA 12'), row=1, col=1)
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.EMA_26, name='EMA 26'), row=1, col=1)
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.MACD, name='MACD'), row=2, col=1)
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.MACD_signal, name='Signal line'), row=2, col=1)
    fig.show()

# plotMACD(ES_1hour_raw)
# plotMACD(NQ_1hour_raw)

## IV. Stochastic

The last technical tool in this notebook is a stochastic oscillator is quite similar to RSI in the sense that it's values (also in range 0-100) can indicate whether a stock is overbought/oversold or not. It is arguably the most complicated indicator compared to the ones introduced earlier. Stochastic can be calculated as:

$$\%K=\frac{C−L_{14}}{H_{14}−L_{14}}×100$$
 
where:  $C$
  is the most recent close price,  $L_{14}$
  and  $H_{14}$
  are the lowest/highest prices traded in last 14 days.

This  $%K$
  stochastic is often referred as the "slow stochastic indicator". There is also a "fast stochastic indicator" that can be obtained as:

$$\%D=SMA_3(\%K)$$

In [None]:
def calc_stochastic(df, k=14, d=3):
    df = df.copy()
    low_min  = df['Low'].rolling(window=k).min()
    high_max = df['High'].rolling( window=k).max()
    df['stoch_k'] = 100 * (df['Close'] - low_min)/(high_max - low_min)
    df['stoch_d'] = df['stoch_k'].rolling(window=d).mean()

# SnP500

calc_stochastic(ES_1min_raw)
calc_stochastic(ES_5min_raw)
calc_stochastic(ES_30min_raw)
calc_stochastic(ES_1hour_raw)

# Nasdaq

calc_stochastic(NQ_1min_raw)
calc_stochastic(NQ_5min_raw)
calc_stochastic(NQ_30min_raw)
calc_stochastic(NQ_1hour_raw)

In [None]:
# Plotting stochastics

def plot_stochastic(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.stoch_k, name='K stochastic'))
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.stoch_d, name='D stochastic'))
    fig.show()

# plot_stochastic(ES_5min_raw)
# plot_stochastic(ES_1hour_raw)

## V. Differencing

In [None]:
# SnP500

ES_1min_raw["Difference"] = ES_1min_raw.Close.diff(1)
ES_5min_raw["Difference"] = ES_5min_raw.Close.diff(1)
ES_30min_raw["Difference"] = ES_30min_raw.Close.diff(1)
ES_1hour_raw["Difference"] = ES_1hour_raw.Close.diff(1)

# Nasdaq

NQ_1min_raw["Difference"] = NQ_1min_raw.Close.diff(1)
NQ_5min_raw["Difference"] = NQ_5min_raw.Close.diff(1)
NQ_30min_raw["Difference"] = NQ_30min_raw.Close.diff(1)
NQ_1hour_raw["Difference"] = NQ_1hour_raw.Close.diff(1)

## VI. On-Balance Volume

In [None]:
def calc_obv(df):
    df["OBV"] = np.where(df['Close'] > df['Close'].shift(1), df['Volume'], 
    np.where(df['Close'] < df['Close'].shift(1), -df['Volume'], 0)).cumsum()

# SnP500

calc_obv(ES_1min_raw)
calc_obv(ES_5min_raw)
calc_obv(ES_30min_raw)
calc_obv(ES_1hour_raw)

# Nasdaq

calc_obv(NQ_1min_raw)
calc_obv(NQ_5min_raw)
calc_obv(NQ_30min_raw)
calc_obv(NQ_1hour_raw)

In [None]:
# Plotting OBV

def plot_obv(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.DateTime, y=df.OBV))
    fig.show()

# plot_obv(ES_5min_raw)
# plot_obv(ES_1hour_raw)

# Model Training

## ARIMA Model parameter tuning and training

In [None]:
#model = auto_arima(NQ_1hour_raw['Close'], seasonal=False, trace=True)
#print(model.summary())

In [None]:
# def calc_arima(df):
#     lis = df.Close.values
#     train, test_raw = lis[:int(lis.size*0.8)], lis[int(lis.size*0.8):]
#     print(train.size, test.size)
# 
#     def arima_forecast(train):
#         model = ARIMA(train, order=(1,1,1))
#         model_fit = model.fit()
# 
#         output = model_fit.forecast()
#         yhat = output[0]
#         print(yhat)
#         return yhat
# 
#     predictions = []
#     test = test_raw[:]
# 
#     for i in range(len(test)):
#         pred = arima_forecast(train)
#         predictions.append(pred)
# 
#         train = np.append(train, [ test[0] ])
#         test = test[1:]
# 
#         print(train.size, test.size)
#     
#     return predictions
# 
# calc_arima(NQ_1hour_raw)

In [None]:
ES_1min_raw

## LSTM


In [None]:
! pip install tensorflow

In [None]:
# Separate in Train and Test Dfs

train_size = int(len(ES_1min_raw) * 0.8)
train_df, test_df = ES_1min_raw.iloc[:train_size], ES_1min_raw.iloc[train_size:]

# scale down everything except Clsoe priuces

# Scale the features
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_df.drop(['DateTime'], axis=1))
test_scaled = scaler.transform(test_df.drop(['DateTime'], axis=1))

# Convert the scaled data back to a DataFrame
train_scaled_df = pd.DataFrame(train_scaled, columns=train_df.columns[:-1], index=train_df.index)
test_scaled_df = pd.DataFrame(test_scaled, columns=test_df.columns[:-1], index=test_df.index)

# Merge the scaled features with the target variable
train_scaled_df['DateTime'] = train_df['DateTime']
test_scaled_df['DateTime'] = test_df['DateTime']

# Create binary classifiers

old_df = train_scaled_df[['Close', 'DateTime']]
old_df['PriceAfter30'] = old_df.shift(periods=-30)['Close']
old_df['Classifier'] = ( old_df['PriceAfter30'] >= (1.0035)*old_df['Close'])
arr = old_df['Classifier'].to_numpy()
y_train = np.where(arr == True, 1, 0)
X_train = train_scaled_df.drop(['DateTime'], axis=1).to_numpy()

old_df = test_scaled_df[['Close', 'DateTime']]
old_df['PriceAfter30'] = old_df.shift(periods=-30)['Close']
old_df['Classifier'] = ( old_df['PriceAfter30'] >= (1.0035)*old_df['Close'])
arr = old_df['Classifier'].to_numpy()
y_test = np.where(arr == True, 1, 0)
X_test = test_scaled_df.drop(['DateTime'], axis=1).to_numpy()

X_test.shape

# Binary Classification with Sonar Dataset: Baseline
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold


# baseline model
def create_baseline():
 model = Sequential()
 model.add(Dense(15, input_shape=(15,), activation='relu'))
 model.add(Dense(1, activation='sigmoid'))
 
 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
 return model

# evaluate model with standardized dataset
estimator = KerasClassifier(model=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True)
results = cross_val_score(estimator, X_train, y_train, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


#arr = old_df[['Close', 'PriceAfter30']].to_numpy()
#arr = arr[:-30]

# Train LSTM


# Hyperparameter optimization (how?)


# Output accuracy report