In [1]:
from math import sqrt
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten
from keras.layers.embeddings import Embedding
from keras.layers import LSTM
from keras.utils import plot_model

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
coffee = pd.read_csv('working_coffee_csv.csv')

In [3]:
coffee.head()

Unnamed: 0,Date,Title,Price,Price_Change,Direction,Rate_of_Change,CV_Vectors,TFIDF_Vectors,Hash_Vectors
0,2007-01-02,India earns more from higher coffee exports in...,1.1506,0.0,0,0.0,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.]
1,2007-01-03,Friesland raises stake in Indonesian subsidiar...,1.176,0.0254,0,0.022075,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.]
2,2007-01-04,Nymex announces start date for soft commodity ...,1.1451,-0.0309,0,-0.026276,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.]
3,2007-01-05,India's largest coffee chain extends to Pakistan,1.1506,0.0055,0,0.004803,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.]
4,2007-01-07,Honduran coffee sales Ugandan coffee funds Soy...,1.1506,-0.0,0,0.0,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.]


In [4]:
coffee['Date'] = pd.to_datetime(coffee['Date'])
coffee = coffee.set_index("Date")



In [5]:
coffee_shift_df = pd.concat([coffee[['Title']].shift(1),coffee[['Rate_of_Change']].shift(1),\
                             coffee[['CV_Vectors']].shift(1),coffee[['TFIDF_Vectors']].shift(1),\
                             coffee[['Hash_Vectors']].shift(1),coffee[['Price']]],axis=1)
coffee_shift_df.dropna(inplace=True)
coffee_shift_df.head()

Unnamed: 0_level_0,Title,Rate_of_Change,CV_Vectors,TFIDF_Vectors,Hash_Vectors,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-03,India earns more from higher coffee exports in...,0.0,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.],1.176
2007-01-04,Friesland raises stake in Indonesian subsidiar...,0.022075,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.],1.1451
2007-01-05,Nymex announces start date for soft commodity ...,-0.026276,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.],1.1506
2007-01-07,India's largest coffee chain extends to Pakistan,0.004803,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.],1.1506
2007-01-09,Honduran coffee sales Ugandan coffee funds Soy...,0.0,[0 0 0 ... 0 0 0],[0. 0. 0. ... 0. 0. 0.],[0. 0. 0. ... 0. 0. 0.],1.129


In [6]:
tokenizer = Tokenizer()

X = coffee_shift_df['Title']
y = coffee_shift_df['Price']

X_token = tokenizer.fit_on_texts(X)
X_matrix = tokenizer.texts_to_matrix(X)
vocab_size = len(tokenizer.word_index) + 1

In [7]:
X_matrix.shape

(1464, 3822)

In [8]:
model = Sequential()

model.add(Embedding(vocab_size, 50))
model.add(LSTM(150))
model.add(Dense(100, activation='relu'))
model.add(Dropout(.20))
model.add(Dense(35, activation='relu'))
model.add(Dense(1))
          
model.compile(loss='mean_absolute_error', optimizer = 'adam')

In [None]:
hist = model.fit(X_matrix, y, epochs=15, validation_split=0.15, batch_size=150)

Train on 1244 samples, validate on 220 samples
Epoch 1/15


In [None]:
hist.history.keys()

In [None]:
plt.figure(figsize=(10,10))
plt.plot(hist.history['loss'], label = 'Training Loss')
plt.plot(hist.history['val_loss'], label = 'Test Loss')
plt.legend()

In [None]:
from keras.utils.vis_utils import plot_model as plot
from IPthon.display import Image

plt(model, to_file='model.png', show_shapes=True)
Image('model.png')
plt.show()

In [2]:
predicted = hist.predict(X_test)
predicted = np.reshape(predicted, (predicted.size,))
var = variance(predicted, y_test)

NameError: name 'hist' is not defined

In [None]:
print('Plotting Results')

from matplotlib.legend_handler import HandlerLine2D

line1 = plt.plot(y_test, marker='d', label='Actual')
line2 = plt.plot(predicted, marker='o', label='Predicted')

plt.legend(handler_map={line1: HandlerLine2D(numpoints=4)})
plt.show()