In [196]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Flatten
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import yfinance as yf
from keras import backend as K
from metrics import *
import yfinance as yf

In [197]:
#import data (10 year time frame)
# Symbols for each commodity
# Gold:GC=F
# SILVER: SI=F
# CORN:CORN
# Natural_Gas: NG=F
# COFFEE : KC=F
# Crude_Oil: CL=F

df_gold= yf.Ticker("GC=F").history(start='2012-10-10', end='2022-10-10').reset_index()
df_silver= yf.Ticker("SI=F").history(start='2012-10-10', end='2022-10-10').reset_index()
df_corn= yf.Ticker("CORN").history(start='2012-10-10', end='2022-10-10').reset_index()
df_gas= yf.Ticker("NG=F").history(start='2012-10-10', end='2022-10-10').reset_index()
df_oil= yf.Ticker("CL=F").history(start='2012-10-10', end='2022-10-10').reset_index()
df_coffee= yf.Ticker("KC=F").history(start='2012-10-10', end='2022-10-10').reset_index()


In [198]:
df_coffee.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2012-10-10 00:00:00-04:00,166.649994,166.949997,163.300003,163.449997,14508,0.0,0.0
1,2012-10-11 00:00:00-04:00,163.300003,163.699997,160.300003,160.75,16170,0.0,0.0
2,2012-10-12 00:00:00-04:00,161.100006,163.100006,159.699997,161.699997,10944,0.0,0.0
3,2012-10-15 00:00:00-04:00,161.699997,162.75,159.5,160.850006,10389,0.0,0.0
4,2012-10-16 00:00:00-04:00,161.5,163.449997,160.350006,162.800003,9753,0.0,0.0


In [199]:
#Visualizin the data
import plotly.express as px
fig = px.line(df_gold, x='Date', y="Open")
fig.show()

In [200]:
print(df_gold.Date.max())
print(df_gold.Date.min())

2022-10-07 00:00:00-04:00
2012-10-10 00:00:00-04:00


In [201]:
# fig, axes = plt.subplots(6, 2, sharex=True, figsize=(20,32))
# plt.grid(True)
# sns.lineplot(ax=axes[0, 0], data=df_gold, x='Date', y='Open')
# axes[0,0].set_title('gold')
# sns.lineplot(ax=axes[0, 1], data=df_coffee, x='Date', y='Open')
# axes[0,1].set_title('Coffee')
# sns.lineplot(ax=axes[1, 0], data=df_oil, x='Date', y='Open')
# axes[1,0].set_title('Oil')
# sns.lineplot(ax=axes[1, 1], data=df_gas, x='Date', y='Open')
# axes[1,1].set_title('Natural Gas')
# sns.lineplot(ax=axes[2, 0], data=df_corn, x='Date', y='Open')
# axes[2,0].set_title('Corn')
# sns.lineplot(ax=axes[2, 1], data=df_silver, x='Date', y='Open')
# axes[2,1].set_title('Silver')


In [202]:
print(df_gold.shape)
date_train=pd.to_datetime(df_gold['Date'])
date_train

(2513, 8)


0      2012-10-10 00:00:00-04:00
1      2012-10-11 00:00:00-04:00
2      2012-10-12 00:00:00-04:00
3      2012-10-15 00:00:00-04:00
4      2012-10-16 00:00:00-04:00
                  ...           
2508   2022-10-03 00:00:00-04:00
2509   2022-10-04 00:00:00-04:00
2510   2022-10-05 00:00:00-04:00
2511   2022-10-06 00:00:00-04:00
2512   2022-10-07 00:00:00-04:00
Name: Date, Length: 2513, dtype: datetime64[ns, America/New_York]

In [203]:
Scale=StandardScaler()
def data_prep(df, lookback, future, Scale):
    date_train=pd.to_datetime(df['Date'])
    df_train=df[['Open','High','Low','Close','Volume','Dividends','Stock Splits']]
    df_train=df_train.astype(float)

    df_train_scaled=Scale.fit_transform(df_train)

    X, y =[],[]
    for i in range(lookback, len(df_train_scaled)-future+1):
        X.append(df_train_scaled[i-lookback:i, 0:df_train.shape[1]])
        y.append(df_train_scaled[i+future-1:i+future, 0])

    return np.array(X), np.array(y), df_train, date_train

Lstm_x, Lstm_y, df_train, date_train = data_prep(df_gold, 30, 1, Scale)

In [204]:
def Lstm_fallback(X,y):
    model = Sequential()

    model.add(LSTM(64, activation='relu',input_shape=(X.shape[1], X.shape[2]),  return_sequences=True))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(LSTM(32, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(y.shape[1], activation='relu'))

    opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
    model.compile(
            loss='mse',
            optimizer=opt,
        )

    es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=15, restore_best_weights=True)
    model.fit(X, y, epochs=10, verbose=1, callbacks=[es], validation_split=0.1, batch_size=16)
    return model

In [205]:
def Lstm_model1(X, y):
    regressor = Sequential()

    regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X.shape[1], X.shape[2])))
    regressor.add(Dropout(0.2))
    regressor.add(LSTM(units = 50, return_sequences = True))
    regressor.add(Dropout(0.2))
    regressor.add(LSTM(units = 50, return_sequences = True))
    regressor.add(Dropout(0.2))
    regressor.add(LSTM(units = 50))
    regressor.add(Dropout(0.2))
    regressor.add(Dense(units = 1))

    regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

    es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=15, restore_best_weights=True)
    regressor.fit(X, y, epochs = 10, validation_split=0.1, batch_size = 32, verbose=1, callbacks=[es])
    return regressor

In [206]:
def Lstm_model2(X,y):
    model=Sequential()
    model.add(LSTM(20,return_sequences=True,input_shape=(X.shape[1], X.shape[2])))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    #model.add(LSTM(15,return_sequences=True))
    #model.add(Dropout(0.2))
    #model.add(BatchNormalization())
    model.add(LSTM(15))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    # K.set_value(model.optimizer.learning_rate, 0.001)
    adam = optimizers.Adam(0.001)
    model.compile(loss='mean_squared_error',optimizer=adam)

    es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=15, restore_best_weights=True)
    model.fit(X, y,validation_split=0.2,epochs=10,batch_size=32,verbose=1, callbacks=[es])
    return model

In [207]:
def predict_open(model,date_train,Lstm_x,df_train, future, Scale):
    forecasting_dates=pd.date_range(list(date_train)[-1], periods=future, freq='1d').tolist()
    predicted=model.predict(Lstm_x[-future:])
    predicted1=np.repeat(predicted, df_train.shape[1], axis=-1)
    predicted_descaled=Scale.inverse_transform(predicted1)[:,0]
    return predicted_descaled,forecasting_dates

def output_prep(forecasting_dates,predicted_descaled):
    dates=[]
    for i in forecasting_dates:
        dates.append(i.date())
    df_final=pd.DataFrame(columns=['Date','Open'])
    df_final['Date']=pd.to_datetime(dates)
    df_final['Open']=predicted_descaled
    return df_final

In [208]:
def results(df, lookback, future, Scale, x):
    Lstm_x, Lstm_y, df_train, date_train = data_prep(df, lookback, future, Scale)
    model=Lstm_model1(Lstm_x,Lstm_y)
    loss=pd.DataFrame(model.history.history)
    # loss.plot()
    future=30
    predicted_descaled,forecasting_dates=predict_open(model,date_train,Lstm_x,df_train,future, Scale)
    results=output_prep(forecasting_dates,predicted_descaled)
    print(results.head())
    plt.show()
    fig = px.area(results, x="Date", y="Open", title=x)
    fig.update_yaxes(range=[results.Open.min()-10, results.Open.max()+10])
    fig.show()

In [209]:
def results1(df, lookback, future, Scale, x):
    Lstm_x, Lstm_y, df_train, date_train = data_prep(df, lookback, future, Scale)
    model=Lstm_model2(Lstm_x,Lstm_y)
    loss=pd.DataFrame(model.history.history)
    # loss.plot()
    # future=30
    predicted_descaled,forecasting_dates=predict_open(model,date_train,Lstm_x,df_train,future, Scale)
    Lstm_y = np.array(Lstm_y) # convert y_test to a numpy array
    Lstm_y = Lstm_y.reshape(-1, 1) # reshape y_test to a one-dimensional array
    print(f'R2  : {R2(predicted_descaled, Lstm_y):.3F}')
    print(f'RSE : {RSE(predicted_descaled, Lstm_y):.3F}')
    print(f'MAE : {MAE(predicted_descaled, Lstm_y):.3F}')
    print(f'MSE : {MSE(predicted_descaled, Lstm_y):.3F}')
    print(f'RMSE: {RMSE(predicted_descaled, Lstm_y):.3F}')
    print(f'MAPE: {MAPE(predicted_descaled, Lstm_y):.3F}')
    print(f'MSPE: {MSPE(predicted_descaled, Lstm_y):.7F}')
    results=output_prep(forecasting_dates,predicted_descaled)
    print(results.head())
    # plt.show()
    # fig = px.area(results, x="Date", y="Open", title=x)
    # fig.update_yaxes(range=[results.Open.min()-10, results.Open.max()+10])
    # fig.show()


In [210]:

# predictions = model.predict(x_test)
# predictions = scaler.inverse_transform(predictions)
# y_test = np.array(y_test) # convert y_test to a numpy array
# y_test = y_test.reshape(-1, 1) # reshape y_test to a one-dimensional array
# print(f'R2  : {R2(predictions, y_test):.3F}')
# print(f'RSE : {RSE(predictions, y_test):.3F}')
# print(f'MAE : {MAE(predictions, y_test):.3F}')
# print(f'MSE : {MSE(predictions, y_test):.3F}')
# print(f'RMSE: {RMSE(predictions, y_test):.3F}')
# print(f'MAPE: {MAPE(predictions, y_test):.3F}')
# print(f'MSPE: {MSPE(predictions, y_test):.7F}')

In [211]:
results1(df_gold,  2000, 14, Scale, 'Gold')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
R2  : -488738088.817
RSE : 22107.422
MAE : 1628.077
MSE : 2650640.640
RMSE: 1628.079
MAPE: 1144.068
MSPE: 1362971.0317463
        Date         Open
0 2022-10-07  1633.009399
1 2022-10-08  1632.176880
2 2022-10-09  1631.506104
3 2022-10-10  1631.417725
4 2022-10-11  1630.869507
