In [1]:
from pandas import read_csv, DataFrame, concat
from datetime import datetime
import matplotlib.pyplot as pyplot
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM 
import numpy as np 



months = {'January': 1, 'February': 2, 'March': 3, 'April': 4, 'May': 5, 
          'June': 6, 'July': 7, 'August': 8, 'September': 9, 'October': 10,
         'November': 11, 'December': 12}

def encode(l):
    encoded = []
    for m in l:
        for key, value in months.items():
            if key == m:
                encoded.append(value)
    return encoded


def parse(x):
    return datetime.strptime(x, '%m/%d/%Y')

# Data Loading
data_set = read_csv('march2020clean.csv', header=0, date_parser = parse)
data_set = data_set.iloc[:data_set['totalSoybeanMealSupply'].last_valid_index(),:] # Truncate dataset to those where every data is available 
'''
'closePrice','openPrice','highPrice','lowPrice','totalSoybeanMealSupply','totalSoybeanMealDemand','soybeanOilSupply','soybeanOilDemand','sunflowerSeedPrice','canolaPrice','peanutsPrice','flaxseedPrice','soybeanOilPrice','cottonseedOilPrice','sunflowerseedOilPrice','canolaOilPrice','peanutOilPrice','cornOilPrice','soybeanMealPrice','cottonseedmealPrice','sunflowerseedMealPrice','linseedMealPrice'
'''
target_col = ['closePrice','canolaOilPrice','soybeanOilPrice','cornOilPrice','soybeanMealPrice','cottonseedmealPrice','sunflowerseedMealPrice','linseedMealPrice', 'Month']
data_set['Month'] = encode(data_set['Month'])
data_set = data_set.loc[:,target_col]


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
data_set
values = data_set.values
values = values.astype('float32')
'''
pyplot.figure()
for i,col in enumerate(interest):
    ax = pyplot.subplot(len(interest), 1, i+1)
    pyplot.plot([datetime.strptime(x,'%m/%d/%Y').date() for x in values[:,0]],values[:, col])
    pyplot.title(data_set.columns[col], y=0.7, loc='right')
    ax.locator_params(axis='y', nbins=4)
    ax.locator_params(axis='x', nbins=6)
pyplot.show()
'''

scaler = StandardScaler()
scaled = scaler.fit_transform(values)




In [None]:
# Data Analysis
def series_to_supervised(data, y_col=0):
    agg = [] 
    lag = 1
    for i in range(lag, len(data)):
        row = np.concatenate((data[i-lag],[data[i][y_col]]))
        agg += [row]
    return agg

data = np.asarray(series_to_supervised(scaled))
print(data)
train_count = int(len(data) * 0.8)
train_data = data[:train_count]
test_data = data[train_count:]
train_x, train_y = train_data[:,:-1], train_data[:,-1]
test_x, test_y = test_data[:,:-1], test_data[:,-1]

# Reshape input to have the form (sample, timestep, features)
train_x = train_x.reshape((1,train_x.shape[0],train_x.shape[1]))
test_x = test_x.reshape((1,test_x.shape[0],test_x.shape[1]))


In [None]:
# design network
model = Sequential()
model.add(LSTM(50, input_shape=(train_x.shape[1], train_x.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam')
# fit network
history = model.fit(train_x, train_y, epochs=50, batch_size=9, validation_data=(test_x, test_y))
# plot history
pyplot.plot(history.history['loss'], label='train')
pyplot.plot(history.history['val_loss'], label='test')
pyplot.legend()
pyplot.show()
# print(model.get_weights())

In [None]:
yhat = model.predict(test_x)
test_x = test_x.reshape((test_x.shape[0], test_x.shape[2]))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_x[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_x[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

pyplot.figure()
pyplot.plot(inv_y, label = 'Actual')
pyplot.plot(inv_yhat, label = 'Forecast')
pyplot.legend()
pyplot.show()