In [None]:
#imports
import numpy as np
import pandas as pd
from __future__ import print_function
import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import SimpleRNN
from keras import initializers
from sklearn.model_selection import train_test_split

In [None]:
# Import PyDrive and associated libraries.
# This only needs to be done once per notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# Download a file based on its file ID.
#
# A file ID looks like: laggVyWshwcyP6kEI-y_W3P8D26sz
file_id = 'REPLACE_WITH_YOUR_FILE_ID'
downloaded = drive.CreateFile({'id': file_id})
print('Downloaded content "{}"'.format(downloaded.GetContentString()))

In [None]:
# processing data set and filtering out columns that have no information
products = pd.read_csv("/content/drive/My Drive/Kaggle/combined/products.csv")
products = products.drop("subhead",1)

reviews = pd.read_csv("/content/drive/My Drive/Kaggle/combined/reviews.csv")
reviews=reviews.drop(["ingredients","texture", "likes","taste"],axis=1)



In [None]:
print(products.shape)

(241, 7)


In [None]:
max_features = 30000
maxlen = 50  
batch_size = 32

In [None]:
## Load in the data.  The function automatically tokenizes the text into distinct integers
## split the data 
(x_train, y_train), (x_test, y_test) = train_test_split(products, random_state=42, shuffle=True, test_size=0.2)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

In [None]:
# This pads (or truncates) the sequences so that they are of the maximum length
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

In [None]:
x_train[123,:]

In [None]:
# building RNN model
rnn_hidden_dim = 10
word_embedding_dim = 50
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                    kernel_initializer=initializers.RandomNormal(stddev=0.001),
                    recurrent_initializer=initializers.Identity(gain=1.0),
                    activation='relu',
                    input_shape=x_train.shape[1:]))

model_rnn.add(Dense(1, activation='sigmoid'))

In [None]:
model_rnn.summary()

In [None]:
rmsprop = keras.optimizers.RMSprop(lr = .0001)

model_rnn.compile(loss='binary_crossentropy',
              optimizer=rmsprop,
              metrics=['accuracy'])

In [None]:
model_rnn.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=10,
          validation_data=(x_test, y_test))

In [None]:
score, acc = model_rnn.evaluate(x_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)