# **Library**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from keras.callbacks import EarlyStopping

In [2]:
df =  pd.read_csv('train.txt',header=None,sep=';',names=['Text','Emotion'])

In [3]:
df.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [4]:
df['length'] = len(df['Text'])
for i in range(0,int(len(df))):
    df['length'][i] = len(df['Text'][i])

In [None]:
df.tail()

# **Visualize Data**

In [None]:
plt.style.use('ggplot')
df['Emotion'].value_counts().sort_values(ascending=True).plot(kind='bar')
plt.show()

# **Model Train**

In [None]:
import spacy
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [None]:
nlp = spacy.load('en_core_web_sm')

def text_processing(text, vocab_size, max_len):
    doc = nlp(text)
    tokens = []
    for token in doc:
        if not token.is_stop and not token.is_punct:
            tokens.append(token.lemma_)
    processed_text = ' '.join(tokens)
    one_hot_word = one_hot(input_text=processed_text, n=vocab_size)
    pad = pad_sequences(sequences=[one_hot_word], maxlen=max_len, padding='pre')[0]  # Extract the first element to avoid extra dimension
    return pad

In [None]:
vocab_size = 11000 
max_len = 300     

X_train = df['Text'].apply(lambda x: text_processing(x, vocab_size, max_len))
X_train = np.stack(X_train.values)  # Convert to numpy array

# Encode string labels to integers
label = LabelEncoder()
integer_encoded = label.fit_transform(df['Emotion'])

y_train = to_categorical(integer_encoded)

print(X_train.shape)  # Ensure it is of shape (num_samples, max_len)
print(y_train.shape)  # Ensure it is of shape (num_samples, num_classes)

# **Building Neural Network**

In [None]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM
from tensorflow.keras.callbacks import EarlyStopping



In [None]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_len))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dropout(0.2))
model.add(Dense(len(label.classes_), activation='softmax'))


# **Compile**

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# **Fit Model**

In [None]:
callback = EarlyStopping(monitor="val_loss", patience=2, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=20, callbacks=[callback], validation_split=0.2)

In [None]:
import pickle
pickle.dump(open(''))

In [None]:

def text_processing(text, vocab_size, max_len):
    doc = nlp(text)
    tokens = []
    for token in doc:
        if not token.is_stop and not token.is_punct:
            tokens.append(token.lemma_)
    processed_text = ' '.join(tokens)
    return processed_text

# Preprocess the input text
sentence = "i am feeling grouchy"
processed_sentence = text_processing(sentence, vocab_size, max_len)

# Convert processed sentence to one-hot encoding
one_hot_word = one_hot(input_text=processed_sentence, n=vocab_size)
padded_sequence = pad_sequences(sequences=[one_hot_word], maxlen=max_len, padding='pre')

# Make prediction
result = label.inverse_transform(np.argmax(model.predict(padded_sequence), axis=-1))[0]
proba = np.max(model.predict(padded_sequence))

print(f"{result} : {proba}\n\n")
