### Step 1 :- Loading relevant libraries

![](http://)* * * 

In [None]:
#!pip install transformers nltk pandas numpy matplotlib seaborn transformers torch

import nltk # using stopwords
import pandas as pd # to create DataFrame
import seaborn as sns # using visualisation
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer # convert text into vectors
from sklearn.model_selection import train_test_split #to split data into training and testing
from sklearn.linear_model import LogisticRegression # to apply logist regression
from sklearn.naive_bayes import GaussianNB # to apply NaiveBayes
from sklearn.metrics import accuracy_score # to check accuracy score
from sklearn.neighbors import KNeighborsClassifier # to apply KNN
from sklearn.tree import DecisionTreeClassifier # to apply Decision Tree
from sklearn.svm import SVC #to apply SVC
from sklearn.ensemble import AdaBoostClassifier,RandomForestClassifier,VotingClassifier 
#to apply Voting classifier, AdaBoost classifier, Randomforest classifier
from joblib import dump #save model

#filter and ignore warnings.
import warnings
warnings.filterwarnings('ignore')

#set max row and columns to None (no limit)
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", None)

### Step 2 :-  Load the dataset

In [None]:
new_df = pd.read_csv('/kaggle/input/emotion-classification-dataset/Data.csv')
new_df

In [None]:
#shufflling dataframe
new_df = new_df.sample(frac=1).reset_index(drop=True)
new_df

### Step 3 :- Getting Data Insights

In [None]:
new_df['sentiment'].value_counts()

In [None]:
print(f"Number of row in our dataframe is {new_df.shape[0]} and column is {new_df.shape[1]}.")

In [None]:
print("Below is Info of our Dataframe\n")
new_df.info()

In [None]:
print("Below is Description of our Dataframe\n")
new_df.describe()

In [None]:
print(f"There are total {new_df.isnull().sum().sum()} null values in our Dataset.")

In [None]:
i = 1

for col in new_df.columns:
    print(f"Our {i} column name is {col}.")
    i = i + 1

In [None]:
print(f"Number of Dimensions of our Dataframe is {new_df.ndim}.")

In [None]:
print(f"SIze of our Dataframe is {new_df.size}.") #row x column 

In [None]:
print("Below is memory usage of each column in bytes\n",new_df.memory_usage(deep=True))

### Step 4 :-  Data preprocess

In [None]:
#create a function which convert text into lowercase and then removing stopwords from column
def preprocess_data(df):
    df['content'] = df['content'].str.lower() 
    stopwords = set(nltk.corpus.stopwords.words('english'))
    df['content'] = df['content'].apply(lambda x : ' '.join(word for word in x.split() if word not in stopwords))
    return df

In [None]:
new_df = preprocess_data(new_df) #applying function

In [None]:
new_df.head()

In [None]:
vectorizer = TfidfVectorizer() #initializing TfidfVectorizer into vectorizer name

In [None]:
x = vectorizer.fit_transform(new_df['content']) #fit into content column

In [None]:
x.toarray()

In [None]:
feature_names = vectorizer.get_feature_names_out()
feature_names

In [None]:
mood_map = {'happy': 0, 'sad': 1, 'angry' : 2}

y = new_df['sentiment'].map(mood_map).values #mapping sentiment into 0,1,2

In [None]:
y

### Step 5 :-  Data Visualisation

In [None]:
new_df.groupby('sentiment').size().plot(kind='barh', color=sns.palettes.mpl_palette('Dark2'))
plt.gca().spines[['top', 'right',]].set_visible(False)

### Step 6 :-  Split Data into Train and Test

In [None]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=352) #split data into train and test

In [None]:
print('Shape Of X_train is :- ',X_train.shape)
print('Shape Of X_test is :- ',X_test.shape)
print('Shape Of y_train is :- ',y_train.shape)
print('Shape Of y_test is :- ',y_test.shape)

### Step 7 :- Applying Different Models

#### 1. Train model of LogisticRegression

In [None]:
classifier1 = LogisticRegression() #initialised

In [None]:
classifier1.fit(X_train,y_train) #fit on training set

In [None]:
y_pred1 = classifier1.predict(X_test) #prediction on test data

In [None]:
accuracy1 = accuracy_score(y_test,y_pred1) #getting accuracy
accuracy1

#### 2. Train model of Naive Bayes

In [None]:
classifier2 = GaussianNB() #initialised

In [None]:
classifier2.fit(X_train.toarray(),y_train)#fit on training set

In [None]:
y_pred2 = classifier2.predict(X_test.toarray()) #prediction on test data

In [None]:
accuracy2 = accuracy_score(y_test,y_pred2) #getting accuracy
accuracy2

#### 3. Train model of K-Nearest Neighbors

In [None]:
classifier3 = KNeighborsClassifier() #initialised

In [None]:
classifier3.fit(X_train,y_train) #fit on training set

In [None]:
y_pred3 = classifier3.predict(X_test) #prediction on test data

In [None]:
accuracy3 = accuracy_score(y_test,y_pred3) #getting accuracy
accuracy3

#### 4. Train model of Decision Tree

In [None]:
classifier4 = DecisionTreeClassifier() #initialised

In [None]:
classifier4.fit(X_train,y_train) #fit on training set

In [None]:
y_pred4 = classifier4.predict(X_test) #prediction on test data

In [None]:
accuracy4 = accuracy_score(y_test,y_pred4) #getting accuracy
accuracy4

#### 5. Train model of Support Vector Machines

In [None]:
classifier5 = SVC() #initialised

In [None]:
classifier5.fit(X_train,y_train) #fit on training set

In [None]:
y_pred5 = classifier5.predict(X_test) #prediction on test data

In [None]:
accuracy5 = accuracy_score(y_test,y_pred5) #getting accuracy
accuracy5

#### 6. Train model of Random Forest Classifier

In [None]:
classifier6 = RandomForestClassifier() #initialised

In [None]:
classifier6.fit(X_train,y_train) #fit on training set

In [None]:
y_pred6 = classifier5.predict(X_test) #prediction on test data

In [None]:
accuracy6 = accuracy_score(y_test,y_pred6) #getting accuracy
accuracy6

#### 7. Train model of Adaboost Classifier

In [None]:
classifier7 = AdaBoostClassifier() #initialised

In [None]:
classifier7.fit(X_train,y_train) #fit on training set

In [None]:
y_pred7 = classifier7.predict(X_test) #prediction on test data

In [None]:
accuracy7 = accuracy_score(y_test,y_pred7) #getting accuracy
accuracy7

### Step 8 :-  Save Models

In [None]:
dump(classifier1,'LogisticRegression.h5')
dump(classifier2,'Naivebayes.h5')
dump(classifier3,'KNN.h5')
dump(classifier4,'DecisionTree.h5')
dump(classifier5,'SVM.h5')
dump(classifier6,'RandomForest.h5')
dump(classifier7,'Adaboost.h5') #for load model use joblib.load('SVM.h5')

### Step 9 :- Prediction

In [None]:
test = ["I don't Like You."] #prediction on new text
new_X = vectorizer.transform(test)

In [None]:
prediction1= classifier1.predict(new_X) # 0 for happy, 1 for sad, 2 for angry
prediction2= classifier2.predict(new_X.toarray()) 
prediction3= classifier3.predict(new_X) 
prediction4= classifier4.predict(new_X) 
prediction5= classifier5.predict(new_X) 
prediction6= classifier6.predict(new_X) 
prediction7= classifier7.predict(new_X) 

In [None]:
print('\033[1m0 for happy, 1 for sad, 2 for angry\033[0m \n')
print('Prediction For LogisticRegression Is',prediction1)
print('Prediction For Naivebayes Is',prediction2)
print('Prediction For KNN Is',prediction3)
print('Prediction For DecisionTree Is',prediction4)
print('Prediction For SVM Is',prediction5)
print('Prediction For RandomForest Is',prediction6)
print('Prediction For Adaboost Is',prediction7)

### Step 10 :-  Additional

**in addition we used VotingClassier method is an ensemble learning method that combines several base models to produce the final optimum solution.**

In [None]:
# Define the individual classifiers
classifier1 = LogisticRegression()
classifier2 = GaussianNB()
classifier3 = KNeighborsClassifier()
classifier4 = DecisionTreeClassifier()
classifier5 = SVC()
classifier6 = RandomForestClassifier()
classifier7 = AdaBoostClassifier()

In [None]:
# Create a list of tuples, each containing a name for the classifier and the classifier itself
classifiers = [
    ('logistic_regression', classifier1),
    ('naive_bayes', classifier2),
    ('k_nearest_neighbors', classifier3),
    ('decision_tree', classifier4),
    ('support_vector_machine', classifier5),
    ('random_forest', classifier6),
    ('adaboost', classifier7)
]

In [None]:
# Initialize the VotingClassifier
voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')

In [None]:
# Fit the VotingClassifier to the training data
voting_classifier.fit(X_train.toarray(), y_train)

In [None]:
# Make predictions using the VotingClassifier
y_pred_voting = voting_classifier.predict(X_test.toarray())

In [None]:
# Calculate accuracy
accuracy_voting = accuracy_score(y_test, y_pred_voting)
print("We have got VotingClassifier Accuracy of",accuracy_voting)

In [None]:
dump(voting_classifier,"Voting Classifier.h5") #model save