In [1]:
import re
import pandas as pd
from emot.emo_unicode import UNICODE_EMO, EMOTICONS
import contractions
import pickle

def text_lower(text):
    text = text.lower()
    text = text.replace('\n', ' ')
    text = text.replace('&amp;', 'and')
    return text


def remove_usernames(text):
    user_pattern = re.compile(
        r'(?<=^|(?<=[^a-zA-Z0-9-\.]))@([A-Za-z_]+[A-Za-z0-9_]+)')
    return user_pattern.sub(r'USER', text)


def convert_emojis(text):
    for emot in UNICODE_EMO:
        text = text.replace(emot, " ".join(
            UNICODE_EMO[emot].replace(",", "").replace(":", "").split()))
    return text


def convert_emoticons(text):
    for emot in EMOTICONS:
        text = re.sub(
            u'('+emot+')', " ".join(EMOTICONS[emot].replace(",", "").split()), text)

    return text


def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    return url_pattern.sub(r'URL', text)


def word_expand(text):
    expanded_words = []
    for word in text.split():
        expanded_words.append(contractions.fix(word))
    expanded_text = ' '.join(expanded_words)
    return expanded_text


def remove_hashtags(text):
    no_hash = re.compile("#(\w+)")
    return no_hash.sub(r' ', text)


def tokenization(text):
    text = re.split('\W+', text)
    return text


def preprocessing(df):
    df['text_lower'] = df['text'].apply(text_lower)
    df['text_no_user'] = df['text_lower'].apply(remove_usernames)
    df['text_no_emoji'] = df['text_no_user'].apply(convert_emoticons)
    df['text_no_emoji'] = df['text_no_user'].apply(convert_emojis)
    df['text_no_emoji'] = df['text_no_emoji'].apply(
        lambda x: x.replace('_', ' '))
    df['text_no_url'] = df['text_no_emoji'].apply(remove_urls)
    df['text_no_url'] = df['text_no_emoji'].apply(remove_urls)
    df['text_expanded'] = df['text_no_url'].apply(word_expand)
    df['text_no_hash'] = df['text_expanded'].apply(remove_hashtags)
    df['processed_data'] = df['text_no_hash']
    df = df.drop(['text', 'text_lower', 'text_no_user', 'text_no_url', 'text_no_emoji', 'text_expanded',
                 'text_no_hash'], axis=1)
    df.rename(columns={'processed_data': 'tweet'}, inplace=True)

    return df

In [12]:
with open('model_d2_normal_logreg.pkl', 'rb') as f:
    model = pickle.load(f)

with open('model_d1_normal_logreg.pkl', 'rb') as f:
    model2 = pickle.load(f)


with open('vectorizer_d2', 'rb') as f2:
    vect = pickle.load(f2)

with open('vectorizer_d1', 'rb') as f2:
    vect2 = pickle.load(f2)



input_text = input("Enter a tweet here: ")
user_df = pd.DataFrame(columns=["text"], data=[[input_text]])
user_df = preprocessing(user_df)
X_user = user_df.tweet
X_user_dtm = vect.transform(X_user.values.astype('U'))
y_user_class = model.predict(X_user_dtm)

X_user2 = user_df.tweet
X_user_dtm2 = vect2.transform(X_user2.values.astype('U'))
y_user_class2 = model2.predict(X_user_dtm2)

if len(y_user_class) == len(y_user_class2):
    for i in range(len(y_user_class)):
        if y_user_class[i] == 1 and y_user_class2[i] == 1:
            res = "Most likely depressed."
            y_user_prob = max(model.predict_proba(X_user_dtm)[
                              :, 1], model2.predict_proba(X_user_dtm2)[:, 1])
            print("Case 1")

        elif y_user_class[i] == 1 and y_user_class2[i] == 0:
            res = "Most likely depressed."
            y_user_prob = model.predict_proba(X_user_dtm)[:, 1]
            print("Case 2")

        elif y_user_class2[i] == 1 and y_user_class[i] == 0:
            res = "Most likely depressed."
            y_user_prob = model2.predict_proba(X_user_dtm2)[:, 1]
            print("Case 3")

        elif y_user_class[i] == 0 and y_user_class2[i] == 0:
            res = "Most likely not depressed."
            print(list(X_user_dtm[:, 1]))
            print(model.predict_proba(X_user_dtm)[:, 1])
            print(model2.predict_proba(X_user_dtm2)[:, 1])
            y_user_prob = max(model.predict_proba(X_user_dtm)[
                              :, 1], model2.predict_proba(X_user_dtm2)[:, 1])

            print("Case 4")
            
else:
    res = "unexpected error"
    y_user_prob = "unexpected error"

result = res
probability = y_user_prob

Enter a tweet here: heart break
[<1x1 sparse matrix of type '<class 'numpy.int64'>'
	with 0 stored elements in Compressed Sparse Row format>]
[0.01832846]
[0.23599499]
Case 4


In [6]:
print(result)
print(probability)

Most likely not depressed.
[0.04975374]
