<a href="https://colab.research.google.com/github/athulyas1206/emotion-classifier-in-text/blob/main/Emotion_detection_in_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Importing Dataset**

In [None]:
import pandas as pd

In [None]:
train_df = pd.read_csv("/content/train.txt", sep=";", header=None, names=["sentence", "emotion"])
test_df = pd.read_csv("/content/test.txt", sep=";", header=None, names=["sentence", "emotion"])
val_df = pd.read_csv("/content/val.txt", sep=";", header=None, names=["sentence", "emotion"])

Concatenate the train and validation (val) set

In [None]:
train_df = pd.concat([train_df, val_df], ignore_index=True)

In [None]:
#for printing the coount of each emotion
count=train_df['emotion'].value_counts()
print(count)

In [None]:
train_df.isnull().sum()

In [None]:
test_df.isnull().sum()

# **Data Preprocessing**

In [None]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [None]:
import nltk
nltk.download('all')

In [None]:
def preprocess_text(text):
  tokens = word_tokenize(text.lower())
  filtered_tokens = [ token for token in tokens if token not in stopwords.words('english')]
  lemmatizer = WordNetLemmatizer()
  lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
  processed_text = ' '.join(lemmatized_tokens)
  return processed_text

In [None]:
train_df['cleaned_sentence'] = train_df['sentence'].apply(preprocess_text)
test_df['cleaned_sentence'] = test_df['sentence'].apply(preprocess_text)

# **Model Training**

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB

In [None]:
pipe_lr_model = Pipeline(steps=[('tv', TfidfVectorizer( norm = 'l2',ngram_range=(1,2))),
    ('lr', LogisticRegression(class_weight='balanced', C=1, solver='lbfgs', random_state=42, max_iter=1000))])
pipe_lr_model.fit(train_df['cleaned_sentence'], train_df['emotion'])

# **Model Evaluation**

In [None]:
lr_model_score = pipe_lr_model.score(test_df['cleaned_sentence'],test_df['emotion'])
print('score of Logistic Regression model is ', lr_model_score)

Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
import matplotlib.pyplot as plt

In [None]:
y_preds = pipe_lr_model.predict(test_df['cleaned_sentence'])
cm = confusion_matrix(test_df['emotion'], y_preds, labels=pipe_lr_model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=pipe_lr_model.classes_)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Emotion Classifier")
plt.show()

# **Sample Prediction**

In [None]:
s= ['i love this place']
prediction = pipe_lr_model.predict(s)
print(prediction[0])
print(pipe_lr_model.predict(["I miss my best friend"]))
print(pipe_lr_model.predict(["I saw a dead body"]))

# **Saving the model**

In [None]:
import joblib
joblib.dump(pipe_lr_model, "model.pkl")

# **UI Using Gradio**

In [None]:
import gradio as gr

In [None]:
model = joblib.load("model.pkl")
def predict_emotion(text):
  prediction = model.predict([text])[0]
  return prediction

interface = gr.Interface(
    fn = predict_emotion,
    inputs = gr.Textbox(lines=2, placeholder='enter the text...'),
    outputs='text',
    title='emotion classifier',

)

interface.launch()