## **Importing Basic Libraries**

In [1]:
import pandas as pd
import numpy as np

## **Reading Train, Test and Val data**

### Train Data

In [2]:
df_train = pd.read_csv('datasets/train.txt',sep = ';', names = ['comment', 'emotion'])
df_train.head()

Unnamed: 0,comment,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [3]:
df_train.emotion.value_counts()

joy         5362
sadness     4666
anger       2159
fear        1937
love        1304
surprise     572
Name: emotion, dtype: int64

### Test Data

In [4]:
df_test = pd.read_csv('datasets/test.txt',sep = ';', names = ['comment', 'emotion'])
df_test.head()

Unnamed: 0,comment,emotion
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness


In [5]:
df_test.emotion.value_counts()

joy         695
sadness     581
anger       275
fear        224
love        159
surprise     66
Name: emotion, dtype: int64

### Val Data

In [6]:
df_val = pd.read_csv('datasets/val.txt',sep = ';', names = ['comment', 'emotion'])
df_val.head()

Unnamed: 0,comment,emotion
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy


In [8]:
df_val.emotion.value_counts()

joy         704
sadness     550
anger       275
fear        212
love        178
surprise     81
Name: emotion, dtype: int64

## **Mapping Numbers to Emotions**

### Train

In [9]:
emotions = {'joy': 0, 'sadness': 1, 'anger': 2, 'fear': 3, 'love':4, 'surprise':5}

df_train['map']= df_train.emotion.map(emotions)
df_test['map']= df_test.emotion.map(emotions)
df_val['map']= df_val.emotion.map(emotions)


## **Using Spacy**

### Creating a function to preprocess text using spacy

In [10]:
import spacy

In [11]:
#import spacy

nlp = spacy.load('en_core_web_sm')

def preprocess(text):
    list =[]
    for token in nlp(text):
        if not token.is_space and not token.is_punct:
            list.append(token.lemma_)
    return ' '.join(list)

In [12]:
df_train['preprocessed_comment']= df_train.comment.apply(preprocess)
df_test['preprocessed_comment']= df_test.comment.apply(preprocess)
df_val['preprocessed_comment']= df_val.comment.apply(preprocess)

## **Training , Testing and Validating data**

In [15]:
X_train = df_train['preprocessed_comment']
X_train.head()

0                              I do not feel humiliate
1    I can go from feel so hopeless to so damned ho...
2        I m grab a minute to post I feel greedy wrong
3    I be ever feel nostalgic about the fireplace I...
4                                    I be feel grouchy
Name: preprocessed_comment, dtype: object

In [16]:
y_train = df_train['map']
y_train.head()

0    1
1    1
2    2
3    4
4    2
Name: map, dtype: int64

In [17]:
X_test = df_test['preprocessed_comment']
X_test.head()

0    I m feel rather rotten so I m not very ambitio...
1             I m update my blog because I feel shitty
2    I never make she separate from I because I don...
3    I leave with my bouquet of red and yellow tuli...
4           I be feel a little vain when I do this one
Name: preprocessed_comment, dtype: object

In [18]:
y_test = df_test['map']
y_test.head()

0    1
1    1
2    1
3    0
4    1
Name: map, dtype: int64

In [19]:
X_val = df_val['preprocessed_comment']
X_val.head()

0    I m feel quite sad and sorry for myself but il...
1    I feel like I be still look at a blank canvas ...
2                       I feel like a faithful servant
3                       I be just feel cranky and blue
4       I can have for a treat or if I be feel festive
Name: preprocessed_comment, dtype: object

In [20]:
y_val = df_val['map']
y_val.head()

0    1
1    1
2    4
3    2
4    0
Name: map, dtype: int64

## **Importing *Pipeline*, *TfidfVectorizer* and *RandomForestClassifier***

In [13]:
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier

## **Training the model**

In [21]:
pipe = Pipeline([('vectorizer Tfidf', TfidfVectorizer()), ('Random Forest', RandomForestClassifier( n_estimators=200,
    criterion='gini'))])

pipe.fit(X_train, y_train)

## **Predicting on Test Data**

In [22]:
y_pred = pipe.predict(X_test)
y_pred_test = pd.Series(y_pred)
y_pred_test.head()

0    1
1    1
2    1
3    0
4    1
dtype: int64

## **Classification Report , Confusion Matrix and Heatmap on *TEST DATA***

In [23]:
from sklearn.metrics import classification_report

print('Classification Report TEST DATA  :\n\n\n ', classification_report(y_test, y_pred_test))

Classification Report TEST DATA  :


                precision    recall  f1-score   support

           0       0.79      0.94      0.86       695
           1       0.91      0.88      0.89       581
           2       0.90      0.78      0.83       275
           3       0.82      0.79      0.80       224
           4       0.81      0.53      0.64       159
           5       0.64      0.53      0.58        66

    accuracy                           0.84      2000
   macro avg       0.81      0.74      0.77      2000
weighted avg       0.84      0.84      0.83      2000



## **Predicting on Val Data**

In [24]:
y_predi = pipe.predict(X_val)
y_pred_val = pd.Series(y_predi)
y_pred_val.head()

0    1
1    1
2    1
3    2
4    0
dtype: int64

## **Classification Report , Confusion Matrix and Heatmap on *VAL DATA***

In [None]:
print('Classification Report VAL DATA  :\n\n\n ', classification_report(y_val, y_pred_val))

In [25]:
def transcript_emotion (string):
    arr_emotions = ['joy', 'sadness', 'anger', 'fear', 'love', 'surprise']
    pred = pipe.predict([string])
    print(pred[0])
    emotion = arr_emotions[pred[0]]

    print("ce texte est : ", emotion)
    
transcript_emotion("I don't want to not do it well")

0
ce texte est :  joy
