In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#define train and test files
TRAIN_PATH = "/content/drive/MyDrive/fallabella/sentiments_train_test_reviews/reviews_training_26000.csv"
TEST_PATH = "/content/drive/MyDrive/fallabella/sentiments_train_test_reviews/reviews_test_4000.csv"

In [None]:
import keras
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import numpy as np
import pandas as pd
import re
import pickle
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Embedding


In [None]:
df_train = pd.read_csv(TRAIN_PATH)
df_test = pd.read_csv(TEST_PATH)

In [None]:
df_train = df_train[['review','sentiment']]
df_test = df_test[['review','sentiment']]

In [None]:
df_test.head()

Unnamed: 0,review,sentiment
0,I have to confess that I am severely disappoin...,negative
1,I have never understood the appeal of this sho...,negative
2,This is supposed to be based on Wilkie Collins...,negative
3,Of all the British imperialist movies like Fou...,positive
4,I loved this film. Not being a swooning Ed Woo...,positive


In [None]:
df_train['sentiment'] = df_train['sentiment'].replace({"positive":1,"negative":0})
df_test['sentiment'] = df_test['sentiment'].replace({"positive":1,"negative":0})

In [None]:
df_test.head(2)

Unnamed: 0,review,sentiment
0,I have to confess that I am severely disappoin...,0
1,I have never understood the appeal of this sho...,0


In [None]:
df_train['review'] = df_train['review'].str.lower()

In [None]:
df_train['review'] = df_train['review'].str.replace(r'http:\S+','',regex = True)
df_train['review'] = df_train['review'].str.replace(r'A-Za-z0-9','',regex = True)
df_train['review'] = df_train['review'].apply(lambda words:' '.join(word.lower() for word in words.split() if word not in stopwords.words('english')))


In [None]:
# result = [len(x) for x in df_train['review']]
# print("Mean %.2f words (%f)" % (np.mean(result), np.std(result)))

Mean 1291.70 words (950.707228)


In [None]:
tf.__version__

'2.8.0'

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((df_train['review'],df_train['sentiment']))
val_dataset = tf.data.Dataset.from_tensor_slices((df_test['review'],df_test['sentiment']))

In [None]:
df_train['review'][4]

'phil the alien is one of those quirky films where the humour is based around the oddness of everything rather than actual punchlines.<br /><br />at first it was very odd and pretty funny but as the movie progressed i didn\'t find the jokes or oddness funny anymore.<br /><br />its a low budget film (thats never a problem in itself), there were some pretty interesting characters, but eventually i just lost interest.<br /><br />i imagine this film would appeal to a stoner who is currently partaking.<br /><br />for something similar but better try "brother from another planet"'

In [None]:
pretrained_model = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(pretrained_model, input_shape=[], dtype=tf.string, trainable=True)

In [None]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16,activation="relu"))
model.add(tf.keras.layers.Dense(1,activation="sigmoid"))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer_1 (KerasLayer)  (None, 20)                400020    
                                                                 
 dense_2 (Dense)             (None, 16)                336       
                                                                 
 dense_3 (Dense)             (None, 1)                 17        
                                                                 
Total params: 400,373
Trainable params: 400,373
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(dataset.shuffle(10000).batch(512),
         epochs=20,
         validation_data=val_dataset.batch(512),
         verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f9981d3d0d0>

In [None]:
model.predict([df_test['review'][4]])

array([[0.9994842]], dtype=float32)

In [None]:
df_test.head()

Unnamed: 0,review,sentiment
0,I have to confess that I am severely disappoin...,0
1,I have never understood the appeal of this sho...,0
2,This is supposed to be based on Wilkie Collins...,0
3,Of all the British imperialist movies like Fou...,1
4,I loved this film. Not being a swooning Ed Woo...,1


In [None]:
df_test['review'][1]

'I have never understood the appeal of this show. The acting is poor (Debra Jo Rupp being a notable exception), the plots of most episodes are trite and uninspiring, the dialogue is weak, the jokes unfunny and it is painful to try and sit through even half an episode. Furthermore the link between this show and the \'70s\' is extremely tenuous beyond the style of dress and the scenery and background used for the show -it seems to be nothing more than a modern sitcom with the same old unfunny, clichéd scripts that modern sitcoms have dressed up as depicting a show from twenty years ago in the hope that it will gain some nostalgic viewers or something like that. Both "Happy Days" and "The Wonder Years" employ the same technique much more effectively and are actually a pleasure to watch in contrast to this horrible, pathetic excuse for a show'

In [None]:
with open('/content/drive/MyDrive/fallabella/sentiments_train_test_reviews/sentiment_analyser.pkl', 'wb') as f:
     pickle.dump(model, f)

In [None]:
with open("/content/drive/MyDrive/fallabella/sentiments_train_test_reviews/sentiment_analyser.pkl", 'rb') as f:
    sentiment_model = pickle.load(f)


In [None]:
sentiment_model.predict(["moive was not good"])

array([[0.26079163]], dtype=float32)