#### Tutorial: 
https://towardsdatascience.com/multi-class-text-classification-with-scikit-learn-12f1e60e0a9f

#### Emotional Speech Database:
http://www.affective-sciences.org/home/research/materials-and-online-research/research-material/

In [1]:
import pandas as pd
df = pd.read_csv('./data/emotionset.csv',encoding = "ISO-8859-1")
df.head(2)

Unnamed: 0,ID,CITY,COUN,SUBJ,SEX,AGE,RELI,PRAC,FOCC,MOCC,...,SELF,RELA,VERBAL,NEUTRO,Field1,Field3,Field2,MYKEY,SIT,STATE
0,11001.0,1.0,1.0,1.0,1.0,33.0,1.0,2.0,6.0,1.0,...,3.0,3.0,2.0,0.0,joy,4,3,110011,"During the period of falling in love, each tim...",1
1,11001.0,1.0,1.0,1.0,1.0,33.0,1.0,2.0,6.0,1.0,...,2.0,2.0,0.0,0.0,fear,3,2,110012,When I was involved in a traffic accident.,1


In [2]:
#Clean up
from io import StringIO

col = ['Field1','SIT']
df = df[col]
df = df[pd.notnull(df['SIT'])]
df.columns = ['Emotion', 'Sentence']
df.head()

Unnamed: 0,Emotion,Sentence
0,joy,"During the period of falling in love, each tim..."
1,fear,When I was involved in a traffic accident.
2,anger,When I was driving home after several days of...
3,sadness,When I lost the person who meant the most to me.
4,disgust,The time I knocked a deer down - the sight of ...


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(sublinear_tf=True, min_df=5, norm='l2', encoding='latin-1', ngram_range=(1, 2), stop_words='english')
features = tfidf.fit_transform(df.Sentence).toarray()
labels = df.Emotion
features.shape

(7666, 2603)

In [4]:
#Naive Bayes Classifier

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Emotion'], random_state = 0)
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(X_train)
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
clf = MultinomialNB().fit(X_train_tfidf, y_train)

In [5]:
print(clf.predict(count_vect.transform(['I have procrastinated too hard'])))

['guilt']


In [6]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
models = [
    RandomForestClassifier(n_estimators=200, max_depth=3, random_state=0),
    LinearSVC(),
    MultinomialNB(),
    LogisticRegression(random_state=0),
]
CV = 5
cv_df = pd.DataFrame(index=range(CV * len(models)))
entries = []
for model in models:
    model_name = model.__class__.__name__
    accuracies = cross_val_score(model, features, labels, scoring='accuracy', cv=CV)
    for fold_idx, accuracy in enumerate(accuracies):
        entries.append((model_name, fold_idx, accuracy))
cv_df = pd.DataFrame(entries, columns=['model_name', 'fold_idx', 'accuracy'])
import seaborn as sns
sns.boxplot(x='model_name', y='accuracy', data=cv_df)
sns.stripplot(x='model_name', y='accuracy', data=cv_df, 
              size=8, jitter=True, edgecolor="gray", linewidth=2)
plt.show()

<matplotlib.figure.Figure at 0x27bffd85860>

In [9]:
#Serialize the classifier to save it as a file. 
import dill as pickle
filename = 'emotion_classifier.pk'

In [11]:
with open('./flask_api/models/'+filename, 'wb') as file:
	pickle.dump(clf, file)

In [12]:
with open('./flask_api/models/'+filename ,'rb') as f:
    loaded_model = pickle.load(f)

In [42]:
loaded_model.predict(count_vect.transform(['I happy procrastinated too hard']))

array(['joy'], dtype='<U7')

In [48]:
#Serialize count vector into file
countName = 'countVectorizer.pk'
with open('./flask_api/models/'+countName, 'wb') as file:
    pickle.dump(count_vect, file)

In [49]:
with open('./flask_api/models/'+countName ,'rb') as f:
    count_vect = pickle.load(f)

In [50]:
print(clf.predict(count_vect.transform(['I have procrastinated too hard'])))

['guilt']


In [36]:
import json
import requests

In [282]:
"""Setting the headers to send and accept json responses
"""
header = {'Content-Type': 'application/json', \
                  'Accept': 'application/json'}
#Request is coming from frontend, which won't have access to this function. 
#request = count_vect.transform(['I have procrastinated too hard'])
#data = json.dumps({"text": " I need to hurry up and learn how to code! Everyone else is better. First I want to finish up machine learning. Too much time has already been wasted from procrastination. "})
data = json.dumps({"text": "I woke up at 11:30 in the morning and fell back asleep until 2:00 PM. The night before I laid in bed watching something on Netflix, I can’t remember now. The past week is a haze. I’ve barely had any energy to get out of bed, let alone do anything productive. Today I managed to make myself some breakfast and that’s the biggest victory I’ve had in the last three days. I have to go to work soon and I’m dreading it. It means having to get a shower, it means having to be around people and interact with them for longer than a few seconds. I already know that as soon as I get home I won’t make dinner, I won’t do anything but lay on my couch for a few hours until I’m tired enough to fall asleep. And this is how I’ve been for months. This is what it’s like to live with depression. "})


In [288]:
#If server hosted locally: 
resp = requests.post("http://184.72.144.143:80/predict", \
                    data = data,\
                    headers= header)

In [289]:
resp = requests.post("http://184.72.144.143:80/calendar", \
                    data = data,\
                    headers= header)

In [270]:
print(resp)

<Response [200]>


In [287]:
resp.json()

{'calSuggestions': {'0': ['2018-06-03 09:15'],
  '1': ['2018-06-02 11:15'],
  '2': ['2018-06-01 16:15']},
 'mindState': 'depression',
 'predictions': ['fear',
  'fear',
  'joy',
  'anger',
  'shame',
  'guilt',
  'guilt',
  'guilt',
  'joy',
  'anger'],
 'sentence': ' ',
 'sentimentState': 0}

In [281]:
json.loads(json.dumps(resp.json()['predictions']))

['fear',
 'fear',
 'joy',
 'anger',
 'shame',
 'guilt',
 'guilt',
 'guilt',
 'joy',
 'anger']

In [212]:
"""POST <url>/predict
"""
resp = requests.post("http://127.0.0.1:5000/predict", \
                    data = json.dumps(data),\
                    headers= header)

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=5000): Max retries exceeded with url: /predict (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000027B8ABFB048>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it',))

In [262]:
re.split(r"\.|\?|\!","Hi! My name is me. I am me."):

SyntaxError: invalid syntax (<ipython-input-262-bea0e88a78c0>, line 1)

In [264]:
import re
# to split by multile characters
paragraph="Hi! My name is me. I am me."
#   regular expressions are easiest (and fastest)
sentenceEnders = re.compile('[.!?]')
sentenceList = sentenceEnders.split(paragraph)
sentenceList

['Hi', ' My name is me', ' I am me', '']