## Downloading Glove Embedding

In [0]:
!wget http://nlp.stanford.edu/data/glove.42B.300d.zip
!apt install unzip
!unzip "glove.42B.300d.zip"

In [0]:
import os
os.listdir()

['.config',
 'glove.42B.300d.zip',
 'gensim_glove_vectors.txt',
 'glove.42B.300d.txt',
 'sample_data']

## Convert Glove Embedding to Word2Vec Embedding

In [0]:
from gensim.scripts.glove2word2vec import glove2word2vec
glove2word2vec(glove_input_file="glove.42B.300d.txt", word2vec_output_file="gensim_glove_vectors.txt")

from gensim.models.keyedvectors import KeyedVectors
glove_model = KeyedVectors.load_word2vec_format("gensim_glove_vectors.txt", binary=False)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


## Installing nlppreprocess package (Optional)

In [0]:
!pip install nlppreprocess

## Download Dataset

In [0]:
!wget !wget https://archive.ics.uci.edu/ml/machine-learning-databases/00462/drugsCom_raw.zip
!unzip 'drugsCom_raw.zip'

## Import Packages

In [0]:
import pandas as pd
import re
from nlppreprocess import NLP
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras.layers import Dense, LSTM, Bidirectional, Dropout, SpatialDropout1D, Embedding, GlobalAveragePooling1D
from keras.models import Sequential
from keras.utils import to_categorical

## Preprocessing Dataset

In [0]:
df = pd.read_csv('drugsComTrain_raw.tsv', sep='\t', engine='python')
df = df[['review', 'rating']]
df.head()

Unnamed: 0,review,rating
0,"""It has no side effect, I take it in combinati...",9.0
1,"""My son is halfway through his fourth week of ...",8.0
2,"""I used to take another oral contraceptive, wh...",5.0
3,"""This is my first time using any form of birth...",8.0
4,"""Suboxone has completely turned my life around...",9.0


In [0]:
df['rating'] = [1 if int(x)>5 else 0 for x in df['rating']]
df.head()

Unnamed: 0,review,rating
0,"""It has no side effect, I take it in combinati...",1
1,"""My son is halfway through his fourth week of ...",1
2,"""I used to take another oral contraceptive, wh...",0
3,"""This is my first time using any form of birth...",1
4,"""Suboxone has completely turned my life around...",1


In [0]:
Y = to_categorical(df['rating'])

## Encoding and padding training set

In [0]:
max_features = 10000
max_len = 40

tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list((df['text'].apply(nlp.process).values)))
list_tokenized_train = tokenizer.texts_to_sequences(df['text'].values)
X_t = pad_sequences(list_tokenized_train, maxlen=max_len)

0

## Create Model

In [0]:
model = Sequential()
model.add(glove_model.get_keras_embedding())
model.add(Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)))
model.add(GlobalAveragePooling1D())
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='sigmoid'))
model.summary()

W0705 19:38:00.665108 139658073241472 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0705 19:38:00.704457 139658073241472 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0705 19:38:00.720929 139658073241472 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0705 19:38:00.721868 139658073241472 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.



## Training Model

In [0]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_t, Y, epochs=10, batch_size=256)

W0701 13:45:17.526484 139621069813632 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0701 13:45:17.557547 139621069813632 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/nn_impl.py:180: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7efbb2faf668>

## Prediction

In [0]:
model.predict(pad_sequences(tokenizer.texts_to_sequences(['security staff was not kind']), maxlen=max_len))

array([[0.99811953, 0.00176055]], dtype=float32)