# Sentimental classfication

In [49]:
import pandas as pd

df = pd.read_csv('IMDB_Dataset.csv')

print(df.columns)

data = {
    "No": [1, 2, 3, 4],
    "review": [
        "Movie was boring",
        "Movie actions were very good",
        "Movie was good",
        "Movie story was very bad"
    ],
    "sentiment": ["Negative", "Positive", "Positive", "Negative"]
}

# Create DataFrame
df = pd.DataFrame(data)

df 

Index(['review', 'sentiment'], dtype='object')


Unnamed: 0,No,review,sentiment
0,1,Movie was boring,Negative
1,2,Movie actions were very good,Positive
2,3,Movie was good,Positive
3,4,Movie story was very bad,Negative


Getting the vocabulary from the unique values from the text input

In [50]:
sent_tokens = df.review.map(lambda x: set(x.split(' ')))
vocab = sent_tokens.explode().unique()
print(f'Voculary: {vocab}')
print(f'Vocabulary size: {vocab.shape[0]}')

Voculary: ['Movie' 'was' 'boring' 'very' 'good' 'actions' 'were' 'bad' 'story']
Vocabulary size: 9


Assiging index values to each unique words in the vocabulary

In [51]:
word_index = {}
for index , words in enumerate(vocab):
    word_index[words] = index

word_index

{'Movie': 0,
 'was': 1,
 'boring': 2,
 'very': 3,
 'good': 4,
 'actions': 5,
 'were': 6,
 'bad': 7,
 'story': 8}

Assigning index values for each words in the sentences , therby converting text into vector representation

In [52]:
sent_indices = sent_tokens.map(lambda x: [word_index[word] for word in x])

print('Sentences string representation') 
print(sent_tokens)

print('Sentences number representation')
print(sent_indices)

Sentences string representation
0                  {Movie, was, boring}
1    {Movie, very, good, actions, were}
2                    {Movie, was, good}
3        {was, Movie, bad, very, story}
Name: review, dtype: object
Sentences number representation
0          [0, 1, 2]
1    [0, 3, 4, 5, 6]
2          [0, 1, 4]
3    [1, 0, 7, 3, 8]
Name: review, dtype: object


Converting postive and negatives to 1 and 0 respectively

In [53]:
target = df.sentiment.map(lambda x: 1 if x == 'Positive' else 0)
target

0    0
1    1
2    1
3    0
Name: sentiment, dtype: int64

# Model development

Weights Initialization

In [54]:
import numpy as np

hidden_size = 3

weights_0_1 = 0.2*np.random.random((len(vocab),hidden_size)) - 0.1
weights_1_2 = 0.2*np.random.random((hidden_size,1)) - 0.1

pd.DataFrame(weights_0_1)

Unnamed: 0,0,1,2
0,0.057619,-0.018253,0.088108
1,0.099659,-0.084616,-0.073206
2,0.077606,0.061302,-0.073075
3,0.045457,-0.061556,0.037043
4,-0.054673,0.035922,-0.013928
5,0.083581,-0.078321,0.099894
6,0.047745,-0.030347,-0.055149
7,0.079233,-0.066672,0.044566
8,-0.066443,0.030508,0.037434


Neural network to compute the sentiments from the reviews

In [55]:
alpha, iteration = 0.1, 10

def sigmoid(x):
    return 1/(1+np.exp(-x))

def neural_network(x,y):

    global weights_0_1, weights_1_2 

    # print(f'Input: len = {len(x)} , data = {x}')

    layer_1 = sigmoid(np.sum(weights_0_1[x],axis=0))
    # print(f'Layer 1 shape: {layer_1.shape}')
    layer_2 = sigmoid(np.dot(layer_1,weights_1_2))
    
    layer_2_delta = layer_2 - y
    # print(f'weights_1_2 shape: {weights_1_2.T.shape}')

    layer_1_delta = layer_2_delta.dot(weights_1_2.T)  
    # print(f'layer_1_delta shape: {layer_1_delta.shape}')

    weights_1_2 -= np.outer(layer_1 , layer_2_delta) * alpha
    weights_0_1[x] -= layer_1_delta * alpha

    return layer_2

correct , total = 0, 0
for i in range(iteration):
    for x,y in zip(sent_indices,target):
        output = neural_network(x,y)

        # print(np.abs(output))
        correct += 1 if np.abs(output) < 0.5 else 0
        total += 1

    print(f'Iteration: {i}, Accuracy: {correct/total}')
    correct , total = 0, 0


Iteration: 0, Accuracy: 0.75
Iteration: 1, Accuracy: 0.75
Iteration: 2, Accuracy: 0.75
Iteration: 3, Accuracy: 0.75
Iteration: 4, Accuracy: 0.75
Iteration: 5, Accuracy: 0.75
Iteration: 6, Accuracy: 0.75
Iteration: 7, Accuracy: 0.75
Iteration: 8, Accuracy: 0.75
Iteration: 9, Accuracy: 0.75


Display emebeding matrix

In [56]:
pd.DataFrame(weights_0_1)

Unnamed: 0,0,1,2
0,0.030734,-0.043743,0.063561
1,0.066739,-0.048122,-0.099999
2,0.058181,0.109333,-0.087069
3,0.018125,-0.085194,0.01055
4,-0.028766,-0.075945,0.004259
5,0.089616,-0.140305,0.102141
6,0.05378,-0.092331,-0.052901
7,0.045867,-0.028326,0.015826
8,-0.09981,0.068853,0.008695
