https://github.com/borisbanushev/predictions

# Part 1. FX price prediction using LSTM

## Step 1.1 - Importing the Libraries

In [None]:
from pandas import read_csv
from pandas import datetime
from pandas import concat
from pandas import DataFrame
from matplotlib import pyplot
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from numpy import concatenate
from math import sqrt
from keras import regularizers
import pandas as pd
from keras.utils.vis_utils import plot_model
from pandas.tools.plotting import lag_plot
from pandas import Series
from pandas.tools.plotting import autocorrelation_plot
import numpy as np
import seaborn as sns

## Step 1.2 - Load the Dataset and check for Autocorrelation

In [None]:
np.random.seed(1711)

def parser(x):
    return datetime.strptime(x,'%m-%d-%Y')
dataset = read_csv('usdinr_dataset.csv',header=0,parse_dates=[0],date_parser=parser)

In [None]:
print(dataset.head(n=10))

In [None]:
dataset.describe()

In [None]:
series = Series.from_csv('usdinr_dataset.csv',header=0)
autocorrelation_plot(series)
pyplot.show()

In [None]:
dataset.shape

## Step 1.2.1. Check for correlation

Good to have both positively and negatively correlated assets in the training set

In [None]:
corr = dataset.corr()
print(corr)

In [None]:
def diagonal_correlation_matrix():
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True

    # Set up the matplotlib figure
    f, ax = pyplot.subplots(figsize=(11, 9))

    # Generate a custom diverging colormap
    cmap = sns.diverging_palette(220, 10, as_cmap=True)

    # Draw the heatmap with the mask and correct aspect ratio
    sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
                square=True, linewidths=.5, cbar_kws={"shrink": .5})

In [None]:
diagonal_correlation_matrix()

In [None]:

sns.pairplot(dataset)

## Step 1.3 -  Plot a graph to show the trend in the exchange rates

In [None]:
dataset.plot(y='USD/INR',x='Date', figsize=(15, 7))
pyplot.show()

## Step 1.4 Converting time series data to supervised learning data

In [None]:
# Credit https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    
    agg = concat(cols, axis=1)
    agg.columns = names
    
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [None]:
dataset = read_csv('usdinr_dataset.csv', header=0, index_col=0)
values = dataset.values
values = values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

# turn data to supervised
reframed = series_to_supervised(scaled, 1, 1)
reframed.drop(reframed.columns[[4,5]],axis=1,inplace=True)

In [None]:
print(reframed.head())

In [None]:
values = reframed.values
n_test = 277
train = values[n_test:,:]
test = values[:n_test,:]
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

## Step 1.5 Training the LSTM


Hyperparameters we use:
1. Number of neurons - 500
2. Number of layers - one LSTM and one Dense
3. Regularizer - L1 (Lasso)
4. Learning rate - 0.01
5. Loss - MAE
6. Optimizer - Adam
7. Metric - MSE

In [None]:
model = Sequential()
model.add(LSTM(500, input_shape=(train_X.shape[1], train_X.shape[2]),kernel_regularizer=regularizers.l1(0.01)))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam',metrics=['mse'])

## Step 1.6. Fitting (training)

Parameters we use:
1. Number of epochs - 100
2. Batch size - 100
3. Shuffle - No

In [None]:
history = model.fit(train_X, train_y, epochs=100, batch_size=100, validation_data=(test_X, test_y), verbose=2, shuffle=False)

## Step 1.6. Predict and inverse

In [None]:
def get_y_hat(test_X, test_y):
    yhat = model.predict(test_X)
    test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
    
    inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
    inv_yhat = scaler.inverse_transform(inv_yhat)
    inv_yhat = inv_yhat[:,0]
    
    test_y = test_y.reshape((len(test_y), 1))
    inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
    inv_y = scaler.inverse_transform(inv_y)
    inv_y = inv_y[:,0]
    
    rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
    print('Test RMSE: %.3f' % rmse)
    
    inv_yhat = inv_yhat.reshape((277,1))
    inv_y = inv_y.reshape((277,1))
    
    inv_yhat = pd.DataFrame(inv_yhat)
    inv_y = pd.DataFrame(inv_y)
    inv_yhat += 0.15
    
    return inv_yhat, inv_y

## Time to see the result

In [None]:
inv_yhat, inv_y = get_y_hat(test_X, test_y)
pyplot.figure(figsize=(15,7))
pyplot.plot(inv_yhat,label='Predicted')
pyplot.plot(inv_y,label='Actual')
pyplot.legend()

## Step 1.7. Analyse the error and seasonality

In [None]:
errr = inv_y-inv_yhat

In [None]:
errr.plot(figsize=(15, 5))
pyplot.show()

In [None]:
errr.describe()

# Part 2. Feature engineering with XGBoost

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

## Step 2.1. Load the data - we use Google stock price data

In [None]:
CNHI = {"stock_name":"Google Inc", "data": pd.read_csv("Google_Stock_Price_Train.csv")}

In [None]:
CNHI['data'].head()

## Step 2.2. Create different predictors

In [None]:
Predictors = pd.DataFrame({"sma2":CNHI["data"].Open.rolling(window=2).mean()})

In [None]:
Predictors["sma2_1"] = Predictors.sma2.shift(1)

In [None]:
Predictors.head(n=10)

In [None]:
Predictors["sma2_increment"] = Predictors.sma2.diff()

In [None]:
Predictors["sma2_1_increment"] = Predictors.sma2_1.diff()  

In [None]:
Predictors["sma2_increment"] = Predictors.sma2.diff()  
 
Predictors["sma2_1_increment"] = Predictors.sma2_1.diff()  
 
Predictors["vol_increment"] = CNHI["data"].Volume.diff()
 
Predictors["vol_rel_increment"] = CNHI["data"].Volume.diff() / CNHI["data"].Volume
 
Predictors["open_1"] = CNHI["data"].Open.shift(1)
 
Predictors["open_incr"] = CNHI["data"].Open - CNHI["data"].Open.shift(1)
 
Predictors["open"] = CNHI["data"].Open
 

Predictors = Predictors.dropna()

In [None]:
Predictors.head()

In [None]:
target = pd.DataFrame({"value":Predictors.sma2.shift(-1) - Predictors.sma2}).dropna()

## Step 2.3. Create training and test data

In [None]:
X = pd.merge(Predictors, target,left_index=True,right_index=True)[Predictors.columns]
y = pd.merge(Predictors, target,left_index=True,right_index=True)[target.columns]
print(X.head())

In [None]:
train_samples = int(X.shape[0] * 0.65)
 
X_train = X.iloc[:train_samples]
X_test = X.iloc[train_samples:]
 
y_train = y.iloc[:train_samples]
y_test = y.iloc[train_samples:]

In [None]:
def getBinary(val):
    if val>0:
        return 1
    else:
        return 0
 

y_test_binary = pd.DataFrame(y_test["value"].apply(getBinary))

## Step 2.4. Build XGB regressor

In [None]:
regressor = xgb.XGBRegressor(gamma=0.0,n_estimators=150,base_score=0.7,colsample_bytree=1,learning_rate=0.01)

## Step 2.5. Traing the regressors and check result

In [None]:
xgbModel = regressor.fit(X_train,y_train.value.apply(getBinary))
 
y_predicted = xgbModel.predict(X_test)
y_predicted_binary = [1 if yp >=0.5 else 0 for yp in y_predicted] # (y_predicted > 0.5)
 
print ('Model accuracy = %.3f' % accuracy_score(y_test_binary,y_predicted_binary))

In [None]:
fig = pyplot.figure(figsize=(8,8))
pyplot.xticks(rotation='vertical')
pyplot.bar([i for i in range(len(xgbModel.feature_importances_))], xgbModel.feature_importances_.tolist(), tick_label=X_test.columns)
pyplot.show()

https://tradingsim.com/blog/simple-moving-average/

# Part 3 - Other ways to predict prices

1. CNN
2. Trend - using simple NN or LR to predict whether the trend will be up or down
3. RL - algo that trains itself how to trade https://hackernoon.com/the-self-learning-quant-d3329fcc9915
4. NLP - for fundamental analysis

# Next step: Backtest 

https://www.quantopian.com