In [25]:
from pandas import read_excel
from pandas import DataFrame
from pandas import concat
from pandas import Series
import numpy
from numpy import append
from numpy import array
from numpy import diff
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
import warnings
warnings.filterwarnings("ignore")

In [26]:
# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return Series(diff)

In [27]:
# invert differenced forecast
def invert_difference(history,yhat,interval=1):
    return yhat + history[-interval]

In [28]:
# Get technical indicators
def get_technical_indicators(data):
    # Create Moving Average
    data = DataFrame(data)
    dataset = DataFrame()
    dataset['ma5'] = data.rolling(window=5).mean()
    dataset['ma20'] = data.rolling(window=25).mean()
    
    # Create MACD
    dataset['25ema'] = data.ewm(span=25).mean()
    dataset['10ema'] = data.ewm(span=10).mean()
    dataset['MACD'] = dataset['10ema']-dataset['25ema']

    # Create Bollinger Bands
    dataset['20sd'] = data.rolling(window = 20).std()
    dataset['upper_band'] = dataset['ma20'] + (dataset['20sd']*2)
    dataset['lower_band'] = dataset['ma20'] - (dataset['20sd']*2)
    
    # Create Exponential moving average
    dataset['ema'] = data.ewm(com=0.5).mean()
    
    # Create Momentum
    dataset['momentum'] = data-1
    # dataset['log_momentum'] = log(dataset['momentum'])
    return dataset.dropna()

In [29]:
def timeseries_to_supervised(data, lag):
    df = DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = concat(columns, axis=1)
    return DataFrame(df)

In [30]:
# scale train and test data to [-1, 1]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

In [31]:
# inverse scaling for a forecasted value
def invert_scale(scaler, X, yhat):
    new_row = [x for x in X] + [yhat]
    array = numpy.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

In [32]:
# fit ML model
def model_fit(data):
    X, y = data[:,:-1], data[:,-1]
    params = {'max_depth': range(3,21,3),
              'min_child_weight': range(1,5,1),
              'learning_rate': [0.001,0.01,0.1],
              'n_estimators': [250,500,750,1000],
              'colsample_bytree': [i/10.0 for i in range(3,10)],
              'colsample_bylevel': [i/10.0 for i in range(3,10)],
              'gamma': [i/10.0 for i in range(0,5)]}
    model = RandomizedSearchCV(XGBRegressor(),params,cv=5,n_iter=20,refit="AUC",verbose=1,n_jobs=-1)
    model_fit = model.fit(X,y)
    return model_fit

In [33]:
# make a one-step forecast
def model_predict(model, X):
    yhat = model.predict(X.reshape(1, -1))
    return yhat[0]

In [34]:
# run a repeated experiment
def experiment(series, features):
    # transform data to be stationary
    raw_values = series.values
    diff_values = difference(raw_values)

    # transform data to be supervised learning
    supervised = timeseries_to_supervised(diff_values, features)
    supervised_values = supervised.values[features:,:]
    
    # get technical indicators
    df = get_technical_indicators(diff_values)
    
    # collect all data
    data = concat([df, DataFrame(supervised_values)],axis=1).dropna()
    
    # split data into train and test-sets
    len_test = 150
    train = data[:-len_test]
    test = data[-len_test:]
    
    # transform the scale of the data
    scaler, train_scaled, test_scaled = scale(array(train), array(test))
    
    # fit the base model
    mod = model_fit(train_scaled)

    # forecast test dataset
    predictions = list()
    for i in range(len(test)):
        # predict
        X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
        # store forecast
        yhat_scaled = model_predict(mod,X)
        # invert scaling
        yhat_diff = invert_scale(scaler, X, yhat_scaled)
        # invert differencing
        yhat = invert_difference(raw_values[-len_test], yhat_diff)
        # store the predictions
        predictions.append(yhat)
    
    return DataFrame(predictions)

In [35]:
path = "C:\\Users\\fh20175\\Box\\Habbab Fatima\\Experiment set 3\\DataSets\\"
asset_list = read_excel("C:\\Users\\fh20175\\Box\\Habbab Fatima\\Experiment set 6\\asset_list.xlsx")

no_assets = len(asset_list)

predictions = DataFrame()

for i in range(0,no_assets):
    ticker = asset_list.iloc[i,0]
    data = read_excel(path+ticker+".xlsx")
    series = data[["Close"]].dropna()
    
    no_features = 2
    
    yhat = experiment(series, no_features)

    predictions[ticker] = yhat

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Fitting 

In [36]:
predictions.to_excel("150day.xlsx")