In [2]:
import pandas as pd
import time
import numpy as np
import mxnet as mx
import inflect
from mxnet import nd, autograd, gluon
from mxnet.gluon import nn, rnn

import datetime
import seaborn as sns

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

import math

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

import xgboost as xgb
from sklearn.metrics import accuracy_score

In [3]:
context = mx.cpu(); model_ctx=mx.cpu()
mx.random.seed(1719)

In [4]:
def parser(x):
    return datetime.datetime.strptime(x,'%m/%d/%Y')

In [5]:
dataset_ex_df = pd.read_csv("AmazonStockPriceDataset.csv", header=0, parse_dates=[0], date_parser=parser)

In [6]:
dataset_ex_df[['Date','Open','Close']].head(5)

Unnamed: 0,Date,Open,Close
0,2010-01-04,136.25,133.899994
1,2010-01-05,133.429993,134.690002
2,2010-01-06,134.600006,132.25
3,2010-01-07,132.009995,130.0
4,2010-01-08,130.559998,133.520004


In [7]:
number_of_days = (dataset_ex_df.shape[0])
year = int(number_of_days / 365)
month = (number_of_days - year *365) % 30
totalweeks = int(number_of_days % 365) 
week = int((number_of_days % 365) % 7)
days = (number_of_days % 365) % 7

In [14]:
p = inflect.engine()
print(f'The total number of days in the dataset is {number_of_days} equivalent to {totalweeks} {p.plural("week", totalweeks)} ')
print(f'or {year} {p.plural("year", year)}, {month} {p.plural("month", month)} and {days} days ')     

The total number of days in the dataset is 2265 equivalent to 75 weeks 
or 6 years, 15 months and 5 days 


In [None]:
plt.figure(figsize=(14, 5), dpi=100)
plt.plot(dataset_ex_df['Date'], dataset_ex_df['Close'], label='Amazon stock')
plt.vlines(datetime.date(2016,4,20), 0, 270, linestyles='--', colors='gray', label='Train/Test data cut-off')
plt.xlabel('Date')
plt.ylabel('USD')
plt.title('Figure 2: Amazon stock price')
plt.legend()
plt.show()

1. Moving Average
2. MACD
3. Bollinger bands a volatility indicator ~ Made from Moving Average (MA) line, a upper band and lower band. The upper and lower bands are simply MA adding and subtracting standard deviation.
    -Used to define the prevailing high and low prices in a market to characterize the trading band of a financial instrument or commodity
4. Exponential moving average
5. Momentum ~ V−Vx where:

V=Latest price

Vx=Closing price

x=Number of days ago
    -Used to measurement of the speed or velocity of price changes, or the rate of change in price movement for a particular asset.

In [None]:
def get_technical_indicators(dataset): #function to generate feature technical indicators
    
    # Create 7 and 21 days Moving Average
    dataset['ma7'] = dataset['Close'].rolling(window = 7).mean()
    dataset['ma21'] = dataset['Close'].rolling(window = 21).mean()
    
    #Create MACD
    dataset['26ema'] = dataset['Close'].ewm(span=26).mean()
    dataset['12ema'] = dataset['Close'].ewm(span=12).mean()
    dataset['MACD'] = (dataset['12ema']-dataset['26ema'])
    
    #Create Bollinger Bands
    dataset['20sd'] = dataset['Close'].rolling(window = 20).std()
    dataset['upper_band'] = (dataset['Close'].rolling(window = 20).mean()) + (dataset['20sd']*2)
    dataset['lower_band'] = (dataset['Close'].rolling(window = 20).mean()) - (dataset['20sd']*2)
    
    
    #Create Exponential moving average
    dataset['ema'] = dataset['Close'].ewm(com=0.5).mean()
    
    #Create Momentum
    dataset['momentum'] = (dataset['Close']/100)-1
    
    return dataset

In [None]:
dataset_TI_df = get_technical_indicators(dataset_ex_df)
dataset_TI_df.head()

In [None]:
def plot_technical_indicators(dataset, last_days):
    plt.figure(figsize=(16, 10), dpi=100)
    shape_0 = dataset.shape[0]
    xmacd_ = shape_0-last_days
    
    dataset = dataset.iloc[-last_days:, :]
    x_ = range(3, dataset.shape[0])
    x_ =list(dataset.index)
    
    # Plot first subplot
    plt.subplot(2, 1, 1)
    plt.plot(dataset['ma7'],label='MA 7', color='g',linestyle='--')
    plt.plot(dataset['Close'],label='Closing Price', color='b')
    plt.plot(dataset['ma21'],label='MA 21', color='r',linestyle='--')
    plt.plot(dataset['upper_band'],label='Upper Band', color='c')
    plt.plot(dataset['lower_band'],label='Lower Band', color='c')
    plt.fill_between(x_, dataset['lower_band'], dataset['upper_band'], alpha=0.35)
    plt.title('Technical indicators for Amazon - last {} days.'.format(last_days))
    plt.ylabel('USD')
    plt.legend()

    # Plot second subplot
    plt.subplot(2, 1, 2)
    plt.title('MACD')
    plt.plot(dataset['MACD'],label='MACD', linestyle='-.')
    plt.hlines(15, xmacd_, shape_0, colors='g', linestyles='--')
    plt.hlines(-15, xmacd_, shape_0, colors='g', linestyles='--')
    plt.plot(dataset['momentum'],label='Momentum', color='b',linestyle='-')

    plt.legend()
    plt.show()

In [None]:
plot_technical_indicators(dataset_TI_df, 400)

Arima

In [None]:
from statsmodels.tsa.arima_model import ARIMA
from pandas import DataFrame
from pandas import datetime

data_FT = dataset_ex_df[['Date', 'Close']]
series = data_FT['Close']
model = ARIMA(series, order=(5, 1, 0))
model_fit = model.fit(disp=0)
print(model_fit.summary())

In [None]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(series)
plt.figure(figsize=(10, 7), dpi=80)
plt.show()

In [None]:
from pandas import read_csv
from pandas import datetime
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error

X = series.values
size = int(len(X) * 0.66)
train, test = X[0:size], X[size:len(X)]
history = [x for x in train]
predictions = list()
for t in range(len(test)):
    model = ARIMA(history, order=(5,1,0))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    predictions.append(yhat)
    obs = test[t]
    history.append(obs)

dataset_TI_df['ARIMA'] = pd.DataFrame(predictions)

In [None]:
error = mean_squared_error(test, predictions)
print('Test MSE: %.3f' % error)

In [None]:
# Plot the predicted (from ARIMA) and real prices

plt.figure(figsize=(12, 6), dpi=100)
plt.plot(test, color='black', label='Real')
plt.plot(predictions, color='yellow', label='Predicted')
plt.xlabel('Days')
plt.ylabel('USD')
plt.title('Figure 5: ARIMA model on Amazon stock')
plt.legend()
plt.show()

In [None]:
dataset_ex_df.head(5)

In [None]:
print('Total dataset has {} samples, and {} features.'.format(dataset_ex_df.shape[0], dataset_ex_df.shape[1]))

FFeature Finding with XGBoost