In [None]:
import re
import nltk
import string
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from nltk import SnowballStemmer
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS
from sklearn.naive_bayes import BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
# nltk.download('stopwords')

In [None]:
sns.set_style(style='darkgrid')

In [None]:
warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv('data/stress.csv')
df.head()

In [None]:
# stopword=set(stopwords.words('english'))
stemmer = SnowballStemmer(language='english')


def clean(text):
    text = str(text).lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = [word for word in text.split(' ') if word not in set(STOPWORDS)]
    text = " ".join(text)
    text = [stemmer.stem(word) for word in text.split(' ')]
    text = " ".join(text)
    return text


df['text'] = df['text'].apply(clean)

In [None]:
plt.figure(figsize=(12, 6))

text = " ".join(i for i in df['text'])
wordcloud = WordCloud(stopwords=set(STOPWORDS)).generate(text)

plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
df["label"] = df["label"].map({0: "No Stress", 1: "Stress"})
df = df[["text", "label"]]
df.head()

In [None]:
X = df['text'].values
y = df['label'].values

cv = CountVectorizer()
X = cv.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=7)

In [None]:
model = BernoulliNB()
model.fit(X_train, y_train)
model.score(X_test, y_test)

In [None]:
user = "People need to take care of their mental health"
data = cv.transform([user])
output = model.predict(data)[0]
print(output)

In [None]:
user = "Sometime I feel like I need some help"
data = cv.transform([user])
output = model.predict(data)[0]
print(output)