In [0]:
import csv
import numpy as np
import pandas as pd

In [0]:
import numpy as np
np.random.seed(0)
from keras.models import load_model, Model
from keras.layers import  Input, Dropout, LSTM, Activation, RepeatVector, Lambda,Dense
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.optimizers import Adam
from keras.utils import to_categorical
np.random.seed(1)


Using TensorFlow backend.


# **Pre-Progressing Data**

In [0]:

def read_glove_vecs(glove_file):
    with open(glove_file, 'r',encoding='UTF-8') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1

    m = len(words_to_index)
    count=0
    for i in range(1,m+1):
      word=index_to_words[i]
      if word_to_vec_map[word].shape[0]!=50:
        word_to_vec_map.pop(word)
        words_to_index.pop(word)
        count+=1
    return words_to_index, index_to_words, word_to_vec_map,count

In [0]:
def sentence_to_embed(sentence,word_to_vec_map):
  words=[x.lower() for x in sentence.split()]
  idx=[]
  for w in words:
    idx.append(word_to_vec_map[w])
  return idx


In [0]:
def sentence_to_index(X,max_len,word_to_index):
  lenght=X.shape[0]
  X_zeros=np.zeros((lenght,max_len))

  for i in range(lenght):
    words = [w.lower() for w in X[i].split()]

    for j,w in enumerate(words[:max_len]):
      try:
        X_zeros[i,j]= word_to_index[w]
      except:
        X_zeros[i,j]= 1999

  return X_zeros


# **Embedding Layer**

In [0]:
def Embedding_layer(word_to_vec_map, word_to_index,count):

    
    vocab_len = len(word_to_index) + 1+count                 # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]    # define dimensionality of your GloVe word vectors (= 50)
    
    ### START CODE HERE ###
    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len, emb_dim))
    
    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word].reshape(50,)

    # Define Keras embedding layer with the correct output/input sizes, make it trainable. Use Embedding(...). Make sure to set trainable=False. 
    embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
    ### END CODE HERE ###

    # Build the embedding layer, it is required before setting the weights of the embedding layer. Do not modify the "None".
    embedding_layer.build((None,))
    
    # Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
    embedding_layer.set_weights([emb_matrix])
    
    return embedding_layer


# **Attention Machanism**

In [0]:
def attention(s_prev,a,max_document_length):
  s_prev = RepeatVector(max_document_length)
  concat = Concatenate([a,s_prev],axis=-1)
  alpha_scores = Dense(1,activation='relu')(concat)
  alpha = Activation('softmax',axis=-1)(alpha_scores)
  context = Dot(axes = 1)([a,alpha])
  return context




In [0]:
def Sentiment_Model(input_shape,n_a,n_s,word_to_idex,word_to_vec_map,attention):

  sentence_index=Input(input_shape,dtype='int32')
  s0 = Input(shape=(n_s,), name='s0')
  c0 = Input(shape=(n_s,), name='c0')
  s = s0
  c = c0
  embedding_layer= Embedding_layer(word_to_vec_map, word_to_index,count)

  # Start model:
  embedding= embedding_layer(sentence_index)
  a = Bidirectional(LSTM(n_a, return_sequences=True))(embedding)
  x=Conv1D(filters,kernel_size,padding='valid',activation='relu',strides=1)(embedding)
  x=MaxPooling1D(pool_size=pool_size)(x)
  x=LSTM(lstm_output_size)(x)
  x=Dense(1)(x)
  x=Activation('sigmoid')(x)  
    # Create Model instance which converts sentence_indices into X.
  model = Model(inputs=sentence_index, outputs=x)
    
    ### END CODE HERE ###
    
  return model

# **Prediction**

In [0]:
def predict_sentence(model,sentence,word_to_index,max_document_length,thresold=0.5):
  index_sentence= sentence_to_index(sentence,max_document_length,word_to_index)
  pred = model.predict(index_sentence)

  sentiment=[]
  for x in pred:
    if 0<=x<thresold:
      sentiment.append('NEGATIVE')
    elif thresold<=x<=1:
      sentiment.append('POSITIVE')
  for i in range(sentence.shape[0]):
      print('Sentence: '+ sentence[i] + ' is our model predicted as '+ sentiment[i])
  return sentiment


# **Visualization**

In [0]:
import matplotlib.pyplot as plt
def visualize(history):
  acc = history.history['acc']
  val_acc = history.history['val_acc']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range(len(acc))

  plt.plot(epochs, acc, 'r', label='Training accuracy')
  plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
  plt.title('Training and validation accuracy')

  plt.figure()

  plt.plot(epochs, loss, 'r', label='Training Loss')
  plt.plot(epochs, val_loss, 'b', label='Validation Loss')
  plt.title('Training and validation loss')
  plt.legend()

  plt.show()