# Text Emotion Detection

In [None]:
# Import Libraries for Project
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
# Load Dataset in csv format
df=pd.read_csv('/content/emotion_dataset_raw.csv')

In [None]:
# Print Top 5 Emotions from Dataset
df.head()

In [None]:
# Count Number of Emotions
df['Emotion'].value_counts()

In [None]:
# Plot Graph
color_palette = sns.color_palette("pastel")
sns.set_palette(color_palette)
sns.countplot(x='Emotion',data=df, hue='Emotion', legend=False)

In [None]:
!pip install neattext



In [None]:
# Data Preprocessing
import neattext.functions as nfx

# Remove the user handles
df['Clean_Text'] = df['Text'].apply(nfx.remove_userhandles)


In [None]:
dir(nfx)

In [None]:
# Splitting data into input variables and target variable
# x : Features are the attributes and variables extracted from the dataset.These extracted features are used as inputs to the model during training.
# y : Lables are the output or the target variable.
x = df['Clean_Text']
y = df['Emotion']

In [None]:
# Splitting data into train and test set
# we need to split our dataset into a train set and test set. The model will learn from the train set. We will use the test set to evaluate the model performance and measure the model's knowledge capability
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
# Training the model
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Create a pipeline with CountVectorized and LogisticRegression

In [None]:
pipe_lr = Pipeline(steps=[('cv',CountVectorizer()),('lr',LogisticRegression())])
pipe_lr.fit(x_train,y_train)
pipe_lr.score(x_test,y_test)


In [None]:
pipe_rf = Pipeline(steps=[('cv',CountVectorizer()),('rf',RandomForestClassifier())])
pipe_rf.fit(x_train,y_train)
pipe_rf.score(x_test,y_test)

In [None]:
# Saving the model
import joblib
pipeline_file = open("text_emotion.pkl","wb")
joblib.dump(pipe_lr,pipeline_file)
pipeline_file.close()