### Naive Bayes Classifier Task
### 문장에서 느껴지는 감정 예측
##### 다중 분류(Multiclass Classification)
- 비대면 심리 상담사로서 메세지를 전달한 환자에 대한 감정 데이터를 수집했다.
- 각 메세지 별로 감정이 표시되어 있다.
- 미래에 동일한 메세지를 보내는 환자에게 어떤 심리 치료가 적합할 수 있는지 알아보기 위한 모델을 구축한다.

In [52]:
import pandas as pd

feeling_df = pd.read_csv('./datasets/feeling.csv', sep=";")
feeling_df

Unnamed: 0,message,feeling
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy
...,...,...
17995,i just had a very brief time in the beanbag an...,sadness
17996,i am now turning and i feel pathetic that i am...,sadness
17997,i feel strong and good overall,joy
17998,i feel like this was such a rude comment and i...,anger


In [53]:
feeling_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18000 entries, 0 to 17999
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   message  18000 non-null  object
 1   feeling  18000 non-null  object
dtypes: object(2)
memory usage: 281.4+ KB


In [54]:
feeling_df.isna().sum()

message    0
feeling    0
dtype: int64

In [55]:
feeling_df.duplicated().sum()

np.int64(1)

In [56]:
feeling_df.drop_duplicates(inplace=True)

In [57]:
feeling_df = feeling_df.reset_index(drop=True)
feeling_df

Unnamed: 0,message,feeling
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy
...,...,...
17994,i just had a very brief time in the beanbag an...,sadness
17995,i am now turning and i feel pathetic that i am...,sadness
17996,i feel strong and good overall,joy
17997,i feel like this was such a rude comment and i...,anger


In [58]:
from sklearn.preprocessing import LabelEncoder

feeling_encoder = LabelEncoder()
targets = feeling_encoder.fit_transform(feeling_df['feeling'])
feeling_df['Target'] = targets
feeling_df

Unnamed: 0,message,feeling,Target
0,im feeling quite sad and sorry for myself but ...,sadness,4
1,i feel like i am still looking at a blank canv...,sadness,4
2,i feel like a faithful servant,love,3
3,i am just feeling cranky and blue,anger,0
4,i can have for a treat or if i am feeling festive,joy,2
...,...,...,...
17994,i just had a very brief time in the beanbag an...,sadness,4
17995,i am now turning and i feel pathetic that i am...,sadness,4
17996,i feel strong and good overall,joy,2
17997,i feel like this was such a rude comment and i...,anger,0


In [59]:
feeling_df.drop(labels=['feeling'], axis=1, inplace=True)
feeling_df

Unnamed: 0,message,Target
0,im feeling quite sad and sorry for myself but ...,4
1,i feel like i am still looking at a blank canv...,4
2,i feel like a faithful servant,3
3,i am just feeling cranky and blue,0
4,i can have for a treat or if i am feeling festive,2
...,...,...
17994,i just had a very brief time in the beanbag an...,4
17995,i am now turning and i feel pathetic that i am...,4
17996,i feel strong and good overall,2
17997,i feel like this was such a rude comment and i...,0


In [60]:
feeling_df.Target.value_counts()

Target
2    6065
4    5216
0    2434
1    2149
3    1482
5     653
Name: count, dtype: int64

In [65]:
feeling_df = feeling_df[feeling_df["Target"] != 5].reset_index(drop=True)
feeling_df

Unnamed: 0,message,Target
0,im feeling quite sad and sorry for myself but ...,4
1,i feel like i am still looking at a blank canv...,4
2,i feel like a faithful servant,3
3,i am just feeling cranky and blue,0
4,i can have for a treat or if i am feeling festive,2
...,...,...
17341,i just had a very brief time in the beanbag an...,4
17342,i am now turning and i feel pathetic that i am...,4
17343,i feel strong and good overall,2
17344,i feel like this was such a rude comment and i...,0


In [66]:
feeling_df.Target.value_counts()

Target
2    6065
4    5216
0    2434
1    2149
3    1482
Name: count, dtype: int64

In [67]:
targets_0 = feeling_df[feeling_df.Target == 0].sample(1482, random_state=124)
targets_1 = feeling_df[feeling_df.Target == 1].sample(1482, random_state=124)
targets_2 = feeling_df[feeling_df.Target == 2].sample(1482, random_state=124)
targets_3 = feeling_df[feeling_df.Target == 3].sample(1482, random_state=124)
targets_4 = feeling_df[feeling_df.Target == 4].sample(1482, random_state=124)


feeling_df = pd.concat([targets_0, targets_1, targets_2, targets_3, targets_4])
feeling_df

Unnamed: 0,message,Target
1671,i often feel dissatisfied when i don t have at...,0
15835,i intend to have them develop feelings for one...,0
84,i told her that i woke up feeling mad that i a...,0
14243,i dwell on this matter the more i feel infuria...,0
9986,i feel like i am really grouchy and some days ...,0
...,...,...
7142,i presented old work which made me feel guilty,4
1019,i do what i do because it feels lame to go alo...,4
4431,i feel inadequate and i shut down and feel cro...,4
951,i feel like i have an ugly duck face when i se...,4


In [68]:
feeling_df.reset_index(drop=True, inplace=True)
feeling_df.Target.value_counts()

Target
0    1482
1    1482
2    1482
3    1482
4    1482
Name: count, dtype: int64

In [69]:
feeling_df

Unnamed: 0,message,Target
0,i often feel dissatisfied when i don t have at...,0
1,i intend to have them develop feelings for one...,0
2,i told her that i woke up feeling mad that i a...,0
3,i dwell on this matter the more i feel infuria...,0
4,i feel like i am really grouchy and some days ...,0
...,...,...
7405,i presented old work which made me feel guilty,4
7406,i do what i do because it feels lame to go alo...,4
7407,i feel inadequate and i shut down and feel cro...,4
7408,i feel like i have an ugly duck face when i se...,4


In [71]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
train_test_split(feeling_df.message, feeling_df.Target, stratify=feeling_df.Target, test_size=0.2)

In [72]:
y_test.value_counts()

Target
2    297
3    297
1    296
4    296
0    296
Name: count, dtype: int64

In [73]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

m_nb_pipe = Pipeline(
    [
        ('count_vectorizer', CountVectorizer()), ('multinomail_NB', MultinomialNB())
    ])
m_nb_pipe.fit(X_train.values, y_train)

0,1,2
,steps,"[('count_vectorizer', ...), ('multinomail_NB', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,input,'content'
,encoding,'utf-8'
,decode_error,'strict'
,strip_accents,
,lowercase,True
,preprocessor,
,tokenizer,
,stop_words,
,token_pattern,'(?u)\\b\\w\\w+\\b'
,ngram_range,"(1, ...)"

0,1,2
,alpha,1.0
,force_alpha,True
,fit_prior,True
,class_prior,


In [74]:
m_nb_pipe.score(X_test.values, y_test)

0.8097165991902834

In [76]:
print(m_nb_pipe.predict(['i often feel dissatisfied']))
print(m_nb_pipe.predict(["i presented old work which made me feel guilty"]))

[0]
[4]


In [77]:
import joblib
joblib.dump(m_nb_pipe, 'feeling_model.pkl')

['feeling_model.pkl']