In [1]:
!pip install nlp

Collecting nlp
  Downloading nlp-0.4.0-py3-none-any.whl (1.7 MB)
[?25l[K     |▏                               | 10 kB 20.7 MB/s eta 0:00:01[K     |▍                               | 20 kB 25.0 MB/s eta 0:00:01[K     |▋                               | 30 kB 13.4 MB/s eta 0:00:01[K     |▉                               | 40 kB 10.0 MB/s eta 0:00:01[K     |█                               | 51 kB 5.4 MB/s eta 0:00:01[K     |█▏                              | 61 kB 5.4 MB/s eta 0:00:01[K     |█▍                              | 71 kB 5.6 MB/s eta 0:00:01[K     |█▋                              | 81 kB 6.3 MB/s eta 0:00:01[K     |█▉                              | 92 kB 4.8 MB/s eta 0:00:01[K     |██                              | 102 kB 5.1 MB/s eta 0:00:01[K     |██▏                             | 112 kB 5.1 MB/s eta 0:00:01[K     |██▍                             | 122 kB 5.1 MB/s eta 0:00:01[K     |██▋                             | 133 kB 5.1 MB/s eta 0:00:01[K     |█

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import nlp
import random
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import regularizers

In [3]:
dataset = nlp.load_dataset('emotion')
train = dataset['train']
val = dataset['validation']
test = dataset['test']

Downloading:   0%|          | 0.00/3.41k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/6.01k [00:00<?, ?B/s]

Using custom data configuration default


Downloading and preparing dataset emotion/default (download: 1.97 MiB, generated: 2.09 MiB, post-processed: Unknown sizetotal: 4.06 MiB) to /root/.cache/huggingface/datasets/emotion/default/0.0.0/84e07cd366f4451464584cdbd4958f512bcaddb1e921341e07298ce8a9ce42f4...


Downloading:   0%|          | 0.00/1.66M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/204k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/207k [00:00<?, ?B/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

0 examples [00:00, ? examples/s]

Dataset emotion downloaded and prepared to /root/.cache/huggingface/datasets/emotion/default/0.0.0/84e07cd366f4451464584cdbd4958f512bcaddb1e921341e07298ce8a9ce42f4. Subsequent calls will reuse this data.


In [4]:
def get_tweet(data):
    tweets = [x['text'] for x in data]
    labels = [x['label'] for x in data]
    return tweets, labels
tweets, labels = get_tweet(train)

In [5]:
tweets[0], labels[0]

('i didnt feel humiliated', 'sadness')

In [6]:
tokenizer = Tokenizer(num_words=10000, oov_token='<UNK>')
tokenizer.fit_on_texts(tweets)

In [7]:
maxlen=50
def get_sequences(tokenizer, tweets):
    sequences = tokenizer.texts_to_sequences(tweets)
    padded = pad_sequences(sequences, truncating = 'post', padding='post', maxlen=maxlen)
    return padded

In [8]:
classes = set(labels)
class_to_index = dict((c,i) for i, c in enumerate(classes))
index_to_class = dict((v,k) for k, v in class_to_index.items())
names_to_ids = lambda labels: np.array([class_to_index.get(x) for x in labels])
train_labels = names_to_ids(labels)

In [10]:
model = tf.keras.models.Sequential([
tf.keras.layers.Embedding(10000,16,input_length=maxlen),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20, kernel_regularizer=regularizers.l2(1e-5), return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
tf.keras.layers.Dense(6, activation='softmax')
])


In [11]:
model.compile(
     loss='sparse_categorical_crossentropy',
     optimizer='adam',
     metrics=['accuracy']
)

In [12]:
padded_train_seq = get_sequences(tokenizer, tweets)

In [13]:
val_tweets, val_labels = get_tweet(val)
val_seq = get_sequences(tokenizer, val_tweets)
val_labels= names_to_ids(val_labels)
h = model.fit(
     padded_train_seq, train_labels,
     validation_data=(val_seq, val_labels),
     epochs=20,
     callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=2)]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


In [14]:
test_tweets, test_labels=get_tweet(test)
test_seq = get_sequences(tokenizer, test_tweets)
test_labels=names_to_ids(test_labels)
model.evaluate(test_seq, test_labels)



[0.4643223285675049, 0.8709999918937683]

In [15]:
i = random.randint(0,len(test_labels)-1)
print('Sentence:', test_tweets[i])
print('Emotion:', index_to_class[test_labels[i]])
p = model.predict(np.expand_dims(test_seq[i], axis=0))[0]
print(test_seq[i])
pred_class=index_to_class[np.argmax(p).astype('uint8')]
print('Predicted Emotion: ', pred_class)

Sentence: i feels so lame
Emotion: sadness
[  2 123  15 717   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
Predicted Emotion:  sadness


In [16]:
import pandas as pd

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/ayyucedemirbas/Amazon_Reviews_Sentiment_Analysis/main/reviews.csv")
df.head()

Unnamed: 0,Reviews,Unnamed: 1
0,Bob Woodward (with Robert Costa) gives readers...,
1,Bob Woodward is making big money with his lies...,
2,This is a great book Bob and Robert did a wond...,
3,"Unlike the other two volumes in this series, W...",
4,The content of the book is a complete liberal ...,


In [18]:
df1=pd.read_csv("https://raw.githubusercontent.com/ayyucedemirbas/Amazon_Reviews_Sentiment_Analysis/main/women_clothes_reviews.csv")

In [None]:
df['Reviews'][0]

'Bob Woodward (with Robert Costa) gives readers his third and final book about the Trump Presidency. As with all of these books, there are many surprising, sometimes shocking, revelations based on interviews with DC insiders. This book begins with an eye opening flashforward, describing how General Milley (Chairman of the Joint Chiefs of Staff) on Jan 8, 2021 tried to assure Chinese officials that Trump was not going to launch an attack against them. Nancy Pelosi and others had talked to Milley about their concerns that Trump was dangerously unstable mentally. The first third of the book, however, focuses on the last year of Trumps Presidency, as well as Joe Bidens campaign for the Democratic nomination and later his race against Trump. I found this part of the book rather superficial, but Woodward does at times provide yet more disturbing evidence of Trumps erratic behavior. For example, Trump tries to bully the FDA Director to rush approval of a COVID vaccine regardless of the safety

In [19]:
df1['reviews'][59]

'\n\n  I ordered xxl but it is little loose so u can pic one size smaller\n\n'

In [None]:
sentence = df['Reviews'][11]
sequence = tokenizer.texts_to_sequences([sentence])
paddedSequence = pad_sequences(sequence, truncating = 'post', padding='post', maxlen=maxlen)
p = model.predict(np.expand_dims(paddedSequence[0], axis=0))[0]
pred_class=index_to_class[np.argmax(p).astype('uint8')]
print('Sentence:', sentence)
print('Predicted Emotion: ', pred_class)

Sentence: They fire
Predicted Emotion:  fear


In [21]:
sentence = df1['reviews'][86]
sequence = tokenizer.texts_to_sequences([sentence])
paddedSequence = pad_sequences(sequence, truncating = 'post', padding='post', maxlen=maxlen)
p = model.predict(np.expand_dims(paddedSequence[0], axis=0))[0]
pred_class=index_to_class[np.argmax(p).astype('uint8')]
print('Sentence:', sentence)
print('Predicted Emotion: ', pred_class)

Sentence: 

  Material quality is good. Fitting is perfect


Predicted Emotion:  joy
