<a href="https://colab.research.google.com/github/bargida/AI_codes/blob/main/stop_words2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
docs = [
    'This is my first sentence',
    'This should be very challenging task',
    'In this book, there is no meaning',
    'This is the last sentence, and we are ending here'

]

In [10]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS

nlp = spacy.load('en_core_web_sm')


def remove_stop_words(text):
  doc = nlp(text)

  #list of NOS TOP WORDS
  no_stop_words = []
  for token in doc:
    if not token.is_stop:
      no_stop_words.append(token.text)
  return ' '.join(no_stop_words)

In [11]:
docs_without_stop_words = [remove_stop_words(doc) for doc in docs]
print(docs_without_stop_words)

['sentence', 'challenging task', 'book , meaning', 'sentence , ending']


In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer

tf_idf_vectorizer = TfidfVectorizer()

tf_idf_matrix = tf_idf_vectorizer.fit_transform(docs_without_stop_words)

tf_idf_matrix

import numpy as np

print(tf_idf_matrix.toarray())


[[0.         0.         0.         0.         1.         0.        ]
 [0.         0.70710678 0.         0.         0.         0.70710678]
 [0.70710678 0.         0.         0.70710678 0.         0.        ]
 [0.         0.         0.78528828 0.         0.6191303  0.        ]]


In [13]:
import pandas as pd

df = pd.read_csv('Emotion_classify_Data.csv')

df

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear
...,...,...
5932,i begun to feel distressed for you,fear
5933,i left feeling annoyed and angry thinking that...,anger
5934,i were to ever get married i d have everything...,joy
5935,i feel reluctant in applying there because i w...,fear


In [14]:
df ['Emotion'].value_counts()

Unnamed: 0_level_0,count
Emotion,Unnamed: 1_level_1
anger,2000
joy,2000
fear,1937


In [15]:
df['Emotion_nums'] = df['Emotion'].map({'anger':0, 'joy':1, 'fear':2})

df

Unnamed: 0,Comment,Emotion,Emotion_nums
0,i seriously hate one subject to death but now ...,fear,2
1,im so full of life i feel appalled,anger,0
2,i sit here to write i start to dig out my feel...,fear,2
3,ive been really angry with r and i feel like a...,joy,1
4,i feel suspicious if there is no one outside l...,fear,2
...,...,...,...
5932,i begun to feel distressed for you,fear,2
5933,i left feeling annoyed and angry thinking that...,anger,0
5934,i were to ever get married i d have everything...,joy,1
5935,i feel reluctant in applying there because i w...,fear,2


In [16]:
df['Comments_without_stop_words'] = df['Comment'].apply(remove_stop_words)

df

Unnamed: 0,Comment,Emotion,Emotion_nums,Comments_without_stop_words
0,i seriously hate one subject to death but now ...,fear,2,seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,0,m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,2,sit write start dig feelings think afraid acce...
3,ive been really angry with r and i feel like a...,joy,1,ve angry r feel like idiot trusting place
4,i feel suspicious if there is no one outside l...,fear,2,feel suspicious outside like rapture happened
...,...,...,...,...
5932,i begun to feel distressed for you,fear,2,begun feel distressed
5933,i left feeling annoyed and angry thinking that...,anger,0,left feeling annoyed angry thinking center stu...
5934,i were to ever get married i d have everything...,joy,1,married d ready offer ve got clubs perfect goo...
5935,i feel reluctant in applying there because i w...,fear,2,feel reluctant applying want able find company...


In [17]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df['Comments_without_stop_words'], df['Emotion_nums'], test_size=0.2, random_state=42)

print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)


x_train shape: (4749,)
x_test shape: (1188,)


In [18]:
tf_idf_vec = TfidfVectorizer()

x_train_tf_idf = tf_idf_vec.fit_transform(x_train)

x_test_tf_idf = tf_idf_vec.transform(x_test)

x_train_tf_idf


<4749x7587 sparse matrix of type '<class 'numpy.float64'>'
	with 37123 stored elements in Compressed Sparse Row format>

In [19]:
#Training a naive bayes classifier model:

from sklearn.naive_bayes import MultinomialNB

naive_bayes_classifier = MultinomialNB()

naive_bayes_classifier.fit(x_train_tf_idf, y_train)

In [20]:
#Testing the model with testing

from sklearn.metrics import classification_report

y_pred = naive_bayes_classifier.predict(x_test_tf_idf)

y_pred

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.89      0.94      0.92       392
           1       0.94      0.88      0.91       380
           2       0.92      0.93      0.92       416

    accuracy                           0.92      1188
   macro avg       0.92      0.92      0.92      1188
weighted avg       0.92      0.92      0.92      1188

