# **Aim:** To predict emotions from a text using Neural Networks

## EDA

### Importing Libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from warnings import filterwarnings
import seaborn as sns
from nltk.corpus import stopwords
stop = stopwords.words('english')
from collections import Counter
from sklearn.metrics import accuracy_score,r2_score
import re
import string
from wordcloud import WordCloud,STOPWORDS
import tensorflow as tf
from tensorflow import keras
from nltk.stem.snowball import SnowballStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import preprocessing
import pickle
filterwarnings("ignore")

2022-04-24 19:08:33.723764: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-24 19:08:33.723788: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


### Initializing Dataset

In [2]:
df = pd.read_csv("training.csv")

In [3]:
#dropping 1st column
df.drop("Unnamed: 0",axis = 1, inplace = True)

In [4]:
#Defining function to remove "#" and "@" 
def clean_tweet(i):
    entity_prefixes = ['@','#']
    for separator in  string.punctuation:
        if separator not in entity_prefixes :
            i = i.replace(separator,' ')
    words = []
    for word in i.split():
        word = word.strip()
        if word:
            if word[0] not in entity_prefixes:
                words.append(word)
    return ' '.join(words)

In [5]:
#Removing Hashtags and "@"
for i in range(0,len(df)):
    df["Tweet"][i] = clean_tweet(df["Tweet"][i])

In [6]:
#Removing "$"
df["Tweet"] = df["Tweet"].str.replace("$","")

In [7]:
#Converting everything into small case
df['Tweet'] = df['Tweet'].map(lambda text:re.sub('[^a-zA-Z0-9]+', ' ',text)).apply(lambda x: (x.lower()))

In [8]:
#Removing Emojis
df = df.astype(str).apply(lambda x: x.str.encode('ascii', 'ignore').str.decode('ascii'))

In [9]:
# Exclude stopwords with Python's list comprehension and pandas.DataFrame.apply.
df["Tweet"] = df['Tweet'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

In [10]:
#Removing the unwanted noise which are not digits or alphabets
df['Tweet'] = df['Tweet'].str.replace('\d+', '')

## Initializing Dataset For Neural Network

### Preparing Dataset For The Model (Data Preprocessing)

In [11]:
# Stemming The Dataset using PorterStemer and making a corpus
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
df['Tweet'] = df['Tweet'].map(lambda text:re.sub('[^a-zA-Z0-9]+', ' ',text)).apply(lambda x: (x.lower()).split())
ps = PorterStemmer()
corpus =df["Tweet"].apply(lambda text_list:' '.join(list(map(lambda word:ps.stem(word),(list(filter(lambda text:text not in set(stopwords.words('english')),text_list)))))))

In [12]:
# Steming every word using SnowballStemmer. 
stemmer = SnowballStemmer("english")
df["Tweet"] = df["Tweet"].apply(lambda x: [stemmer.stem(y) for y in x]) 

In [13]:
#Vectorizing The Corpus
cv = CountVectorizer()
words_vectorized = cv.fit_transform(corpus).toarray()

In [14]:
#Encoding the Emotions
le = preprocessing.LabelEncoder()
emotions =le.fit_transform(df["Emotion"])

In [15]:
# Splitting Dataset into 17690:3000 i.e 17690 for training and 3000 for testing
(X_train, y_train) =  words_vectorized[:17690],emotions[:17690]
(X_test, y_test)  = words_vectorized[:3000],emotions[:3000]

In [16]:
#Checking the size of training and testing data
print("Length of X_train:",len(X_train))
print("Length of X_test:",len(X_test))
print("Length of y_train:",len(y_train))
print("Length of y_test:",len(y_test))

Length of X_train: 17690
Length of X_test: 3000
Length of y_train: 17690
Length of y_test: 3000


### Using Keras.Sequential Neural Network

In [17]:
#Designing Architecture of Neural Network
model = keras.Sequential([
    keras.layers.Dense(10000,input_shape = (12376,), activation='relu'),
   
    keras.layers.Dense(6, activation = 'softmax'),
])

2022-04-24 19:09:31.088300: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-04-24 19:09:31.088415: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-04-24 19:09:31.088463: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Jammy): /proc/driver/nvidia/version does not exist
2022-04-24 19:09:31.088979: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [18]:
#Compiling Neural Network
model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
#Training Neural Network
model.fit(X_train, y_train, epochs = 128)

2022-04-24 19:09:31.960246: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 1751451520 exceeds 10% of free system memory.


Epoch 1/128

In [None]:
#Predicitng probabilities
pred = model.predict(X_test)

In [None]:
#Making a list for maximum prbability index
y_pred = []
for i in pred:
    y_pred.append(np.argmax(i))

In [None]:
#Testing Accuracy
accuracy_score(y_test,y_pred)

In [None]:
# Saving the neural network
model.save("EmotionNeuralNetwork2")