# Importing the dependencies

In [35]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd

# Load the cleaned dataset

In [36]:
data = pd.read_csv('./datasets/cleaned_twitter_training.csv')

In [37]:
data.head()

Unnamed: 0.1,Unnamed: 0,context,sentiment,text,cleaned_text
0,0,Borderlands,1,im getting on borderlands and i will murder yo...,get borderland murder
1,1,Borderlands,1,I am coming to the borders and I will kill you...,come border kill
2,2,Borderlands,1,im getting on borderlands and i will kill you ...,get borderland kill
3,3,Borderlands,1,im coming on borderlands and i will murder you...,come borderland murder
4,6,Borderlands,1,So I spent a few hours making something for fu...,spent hour make someth fun know huge borderlan...


In [38]:
data = data.drop(['Unnamed: 0'], axis=1)

In [39]:
texts = data['text'].values.tolist()
labels = data['sentiment'].values.tolist()

In [40]:
texts

['im getting on borderlands and i will murder you all ,',
 'I am coming to the borders and I will kill you all,',
 'im getting on borderlands and i will kill you all,',
 'im coming on borderlands and i will murder you all,',
 "So I spent a few hours making something for fun. . . If you don't know I am a HUGE @Borderlands fan and Maya is one of my favorite characters. So I decided to make myself a wallpaper for my PC. . Here is the original image versus the creation I made :) Enjoy! pic.twitter.com/mLsI5wf9Jg",
 "So I spent a couple of hours doing something for fun... If you don't know that I'm a huge @ Borderlands fan and Maya is one of my favorite characters, I decided to make a wallpaper for my PC.. Here's the original picture compared to the creation I made:) Have fun! pic.twitter.com / mLsI5wf9Jg",
 "So I spent a few hours doing something for fun... If you don't know I'm a HUGE @ Borderlands fan and Maya is one of my favorite characters.",
 "So I spent a few hours making something 

In [41]:
labels

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,


# Constructing the model

In [42]:
# Hyperparameters
VOCAB_SIZE = 1000
EMBEDDING_DIM = 16
MAX_LENGTH = 20
TRUNCATING_TYPE='post'
PADDING_TYPE='post'
OOV_TOKEN = "<OOV>"

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=VOCAB_SIZE, oov_token=OOV_TOKEN)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=MAX_LENGTH, padding=PADDING_TYPE, truncating=TRUNCATING_TYPE)

In [43]:
# Define neural networks
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAX_LENGTH),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [44]:
labels = np.array(labels)

# Train the model
num_epochs = 10
model.fit(padded_sequences, labels, epochs=num_epochs, verbose=2)

Epoch 1/10
1067/1067 - 2s - loss: 0.4791 - accuracy: 0.7660 - 2s/epoch - 2ms/step
Epoch 2/10
1067/1067 - 2s - loss: 0.3293 - accuracy: 0.8574 - 2s/epoch - 2ms/step
Epoch 3/10
1067/1067 - 1s - loss: 0.2660 - accuracy: 0.8874 - 1s/epoch - 1ms/step
Epoch 4/10
1067/1067 - 1s - loss: 0.2214 - accuracy: 0.9101 - 1s/epoch - 1ms/step
Epoch 5/10
1067/1067 - 2s - loss: 0.1877 - accuracy: 0.9257 - 2s/epoch - 1ms/step
Epoch 6/10
1067/1067 - 2s - loss: 0.1621 - accuracy: 0.9361 - 2s/epoch - 1ms/step
Epoch 7/10
1067/1067 - 1s - loss: 0.1412 - accuracy: 0.9456 - 1s/epoch - 1ms/step
Epoch 8/10
1067/1067 - 1s - loss: 0.1263 - accuracy: 0.9508 - 1s/epoch - 1ms/step
Epoch 9/10
1067/1067 - 1s - loss: 0.1113 - accuracy: 0.9569 - 1s/epoch - 1ms/step
Epoch 10/10
1067/1067 - 2s - loss: 0.1014 - accuracy: 0.9608 - 2s/epoch - 1ms/step


<keras.callbacks.History at 0x2cee02532e0>

In [45]:
def classify_text(text: str):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=MAX_LENGTH, padding=PADDING_TYPE, truncating=TRUNCATING_TYPE)
    
    prediction = model.predict(padded_sequence)
    
    return "Positive" if prediction >= .5 else "Negative"


In [46]:
test_text = "This product is amazing!"
print(classify_text(test_text))

Positive
