<a href="https://colab.research.google.com/github/engineereliab076/my-projects/blob/main/EMOTIONAL_CLASSIFIER_2_wc_word_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import spacy
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [12]:
!python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-3.7.1
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')


In [14]:
nlp = spacy.load("en_core_web_lg")

In [2]:
data = pd.read_csv('/content/Emotion_classify_Data.csv')

In [3]:
data.head()

Unnamed: 0,Comment,Emotion
0,i seriously hate one subject to death but now ...,fear
1,im so full of life i feel appalled,anger
2,i sit here to write i start to dig out my feel...,fear
3,ive been really angry with r and i feel like a...,joy
4,i feel suspicious if there is no one outside l...,fear


In [5]:
data.describe()

Unnamed: 0,Comment,Emotion
count,5937,5937
unique,5934,3
top,i feel like a tortured artist when i talk to her,anger
freq,2,2000


In [6]:
data.shape

(5937, 2)

In [7]:
data['Emotion'].value_counts()

anger    2000
joy      2000
fear     1937
Name: Emotion, dtype: int64

In [8]:
data['Emotion_num'] = data['Emotion'].map({'anger':0,'joy':1,'fear':2})

In [9]:
data.head()

Unnamed: 0,Comment,Emotion,Emotion_num
0,i seriously hate one subject to death but now ...,fear,2
1,im so full of life i feel appalled,anger,0
2,i sit here to write i start to dig out my feel...,fear,2
3,ive been really angry with r and i feel like a...,joy,1
4,i feel suspicious if there is no one outside l...,fear,2


In [17]:
#PREPROCESS DATA
def preprocess(text):
  doc=nlp(text)
  filtered_token=[]
  for token in doc:
   if token.is_stop or token.is_punct:
    continue
   filtered_token.append(token.lemma_)

  return ' '.join(filtered_token)


In [18]:
data['preprocess'] = data['Comment'].apply(preprocess)

In [46]:
#apply word embeddings to comments
data['vector'] = data['Comment'].apply(lambda text:nlp(text).vector)

In [47]:
data.head()

Unnamed: 0,Comment,Emotion,Emotion_num,vector,preprocess
0,i seriously hate one subject to death but now ...,fear,2,"[0.19650966, 2.856644, -3.2373114, -3.3074455,...",seriously hate subject death feel reluctant drop
1,im so full of life i feel appalled,anger,0,"[-1.3807732, -2.3297756, -4.009363, -4.3174157...",m life feel appalled
2,i sit here to write i start to dig out my feel...,fear,2,"[0.53448343, 3.454533, -3.2703161, -2.7471306,...",sit write start dig feeling think afraid accep...
3,ive been really angry with r and i feel like a...,joy,1,"[-0.9963525, 0.23077007, -1.1077425, -0.929426...",ve angry r feel like idiot trust place
4,i feel suspicious if there is no one outside l...,fear,2,"[-0.42563504, 2.486397, -2.211311, -1.3845607,...",feel suspicious outside like rapture happen


train test split

In [61]:
x_train,x_test,y_train,y_test = train_test_split(
    data.vector,
    data.Emotion_num,
    test_size=0.2,
    random_state=2022,
    stratify=data.Emotion_num
)

In [62]:
print(x_train.shape,x_test.shape)

(4749,) (1188,)


In [63]:
x_train

5055    [0.9127638, 1.2363979, -2.4099076, -4.344601, ...
2662    [-1.1807996, 0.6724565, -4.210298, -0.64901, 2...
1355    [-0.25201005, -0.3494113, -2.9108336, -3.10738...
4393    [0.3438489, 1.30668, -3.1477644, -2.2374752, -...
3882    [-0.60537845, 0.33223882, -1.6118485, -3.22728...
                              ...                        
1142    [-0.6715271, -0.77006716, -4.2665815, -4.01005...
5915    [-0.76894563, -1.3046114, -3.427157, -3.010971...
4011    [-0.28734776, 0.6348115, -2.922315, -1.9577243...
3252    [-0.02242709, 1.4568181, -4.2382307, -3.310327...
5231    [-0.23552084, 1.1803368, -4.650641, -2.9436696...
Name: vector, Length: 4749, dtype: object

In [64]:
#convert data to 2d
x_train_2d = np.stack(x_train)
x_test_2d=np.stack(x_test)

In [65]:
x_train_2d

array([[ 0.9127638 ,  1.2363979 , -2.4099076 , ...,  1.6307905 ,
        -6.334743  ,  1.281648  ],
       [-1.1807996 ,  0.6724565 , -4.210298  , ..., -0.5999999 ,
        -5.453925  ,  2.1544142 ],
       [-0.25201005, -0.3494113 , -2.9108336 , ...,  0.6681275 ,
        -4.914402  ,  1.9980171 ],
       ...,
       [-0.28734776,  0.6348115 , -2.922315  , ..., -0.00775771,
        -5.4758053 ,  3.0263135 ],
       [-0.02242709,  1.4568181 , -4.2382307 , ...,  0.9366212 ,
        -7.150977  ,  2.130958  ],
       [-0.23552084,  1.1803368 , -4.650641  , ...,  1.8322788 ,
        -6.244241  ,  1.6104487 ]], dtype=float32)

In [66]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_train_scaled=scaler.fit_transform(x_train_2d)
x_test_scaled=scaler.fit_transform(x_test_2d)

model training

In [67]:
from sklearn.pipeline import Pipeline

In [68]:
from sklearn.ensemble import RandomForestClassifier

In [69]:
clf = Pipeline([
    ('rfc',RandomForestClassifier())
])

clf.fit(x_train_scaled,y_train)

y_pred = clf.predict(x_test_scaled)

print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.54      0.41      0.47       528
           1       0.41      0.54      0.47       303
           2       0.36      0.39      0.38       357

    accuracy                           0.44      1188
   macro avg       0.44      0.45      0.44      1188
weighted avg       0.45      0.44      0.44      1188



In [70]:
from sklearn.naive_bayes import MultinomialNB

In [71]:
clf = Pipeline([
    ('nb',MultinomialNB())
])

clf.fit(x_train_scaled,y_train)

y_pred = clf.predict(x_test_scaled)

print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.35      0.41      0.38       341
           1       0.71      0.43      0.54       664
           2       0.18      0.37      0.24       183

    accuracy                           0.41      1188
   macro avg       0.41      0.40      0.38      1188
weighted avg       0.52      0.41      0.44      1188



In [75]:
from sklearn.neighbors import KNeighborsClassifier

clf = Pipeline([
    ('knn',KNeighborsClassifier(n_neighbors=3))
])

clf.fit(x_train_scaled,y_train)

y_pred = clf.predict(x_test_scaled)

print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.56      0.36      0.44       614
           1       0.35      0.44      0.39       320
           2       0.23      0.35      0.28       254

    accuracy                           0.38      1188
   macro avg       0.38      0.39      0.37      1188
weighted avg       0.43      0.38      0.39      1188



In [79]:
from sklearn.linear_model import LogisticRegression

clf = Pipeline([
    ('lr',LogisticRegression())
])

clf.fit(x_train_2d,y_train)

y_pred = clf.predict(x_test_2d)

print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.67      0.66      0.66       406
           1       0.73      0.69      0.71       420
           2       0.65      0.69      0.67       362

    accuracy                           0.68      1188
   macro avg       0.68      0.68      0.68      1188
weighted avg       0.68      0.68      0.68      1188



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [84]:
from sklearn import svm

clf = Pipeline([
    ('lr',svm.SVC(decision_function_shape='ovo'))
])

clf.fit(x_train_2d,y_train)

y_pred = clf.predict(x_test_2d)

print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.65      0.58      0.61       445
           1       0.74      0.66      0.70       451
           2       0.47      0.62      0.54       292

    accuracy                           0.62      1188
   macro avg       0.62      0.62      0.61      1188
weighted avg       0.64      0.62      0.62      1188

