In [54]:
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
import pandas as pd
import numpy as np
import collections
import tensorflow as tf
nltk.download('stopwords')
nltk.download('vader_lexicon')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/johndinovi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/johndinovi/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/johndinovi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/johndinovi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [55]:
# Define Some Functionality for Data Cleaning
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()
stop_words = set(stopwords.words('english'))

# Function to Remove Punctuation
def remove_punctuation(text):
  '''
  Input:
    'text': a string of characters
  Output:
    'text', but removed of punctuation
  '''
  text = str(text)
  for punc in string.punctuation:
      if punc in text:
          text = text.replace(punc, ' ')
  return text.strip()

# Function to Tokenize Text and Remove Stop Words from Text
def remove_stop_words_and_tokenize(text):
  '''
  Input: 
    'text': a string removed of punctuation
  Output: 
    Tokenized version of 'text' and with stop words removed
  '''
  text = text.lower()
  tokenized = word_tokenize(text)
  no_stops = []
  for word in tokenized:
    if word not in stop_words:
      no_stops.append(word)
  return no_stops

# Function to Stem Tokenized text
def stem(tokenized_text):
  '''
  Input: 
    'tokenized_text': a list of a tokenized feature
  Output: 
    Stemmed version of 'tokenized_text'
  '''
  for i, word in enumerate(tokenized_text):
    tokenized_text[i] = stemmer.stem(word)
  return tokenized_text

# Function to Lemmatize Tokenized text
def lemmatize(tokenized_text):
  '''
  Input: 
    'tokenized_text': a list of a tokenized feature
  Output: 
    Lemmatized version of 'tokenized_text'
  '''
  for i, word in enumerate(tokenized_text):
    tokenized_text[i] = lemmatizer.lemmatize(word)
  return tokenized_text

# ---------------------------------------------------------- #

# Function to Return Cleaned Text
def clean(text):
  '''
  Input: 
    'text': an input string
  Output: 
    'text' with removed punctuation, tokenized,
          stop words removed, and lemmatized
  '''
  text = remove_punctuation(text)
  text = remove_stop_words_and_tokenize(text)
  text = lemmatize(text)
  return text

In [56]:
# Implement Bag of Words Model
def build_set(feature_list):
  '''
  Input: 
    'feature_list': list of tuples of uncleaned input (0) text and sentiment (1)
  Output: 
    Set of all words in the combined and cleaned strings from 'feature_list'
  '''
  total_string = ""
  for feature in feature_list:
    if type(feature[0]) == type(" "):
      total_string += " " + feature[0]
  total_string = clean(total_string)
  return set(total_string)

# Function to Build a BoW Vector for a Given Feature and Set of Words
def build_vector(feature, word_set):
  '''
  Input:
    'feature'(list): a cleaned string of input
    'word_set'(set): a set of all clean words seen over all features
  Output:
    A bag of words vectorized version of a given feature
  '''
  vector = np.zeros((len(word_set),))
  if feature:
    word_counter = collections.Counter(feature)
    for i, word in enumerate(word_set):
      vector[i] += word_counter[word]
  return vector

def bow_matrix(feature_list, myset=None):
  '''
  Input:
    'feature_list': a list of features to train the model on
  Output:
    A matrix of the BoW representation of each feature
  '''
  if not myset:
    myset = build_set(feature_list)
  bow_mat = []
  y_vec = []
  for i, feature in enumerate(feature_list):
    y_vec.append(feature[1])
    feature = clean(feature[0])
    vector = build_vector(feature, myset)
    bow_mat.append(np.array(vector))
  return np.array(bow_mat), np.array(y_vec), myset

In [57]:
# Function to Handle the Data
def process_data(data):
  '''
  Input:
    'data': a pandas data frame with entries 'text' and 'sentiment'
  Output:
    A processed form of all the features with just the 'text' and 'sentiment'
    values zipped together
  '''
  out_text = []
  out_sents = []
  text = np.array(data['text'])
  sentiment = np.array(data['sentiment'])
  for i, sent in enumerate(sentiment):
    if sent == "positive":
      out_sents.append([0, 0, 1])
    elif sent == "neutral":
      out_sents.append([0, 1, 0])
    elif sent == "negative":
      out_sents.append([1, 0, 0])
    out_text.append(text[i])
  feature_list = zip(out_text, out_sents)
  return tuple(feature_list)

In [121]:
# Import the Twitter Data
train_data = pd.read_csv('./train.csv', encoding='unicode_escape')
test_data = pd.read_csv('./test.csv', encoding='unicode_escape')

In [122]:
# Process the Training and Testing Data
train_features = process_data(train_data)
test_features = process_data(test_data)

x_train, y_train, mydict = bow_matrix(train_features)
with open('../mydict.txt', 'w') as f:
    for word in mydict:
        f.write(str(word) + ',')
x_test, y_test, _ = bow_matrix(test_features, mydict)

# Normalize the inputs
x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

In [123]:
feature_size = len(x_train[0]) #23903
print(feature_size)
input_dim = (feature_size,)

def build_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(256, activation=tf.nn.relu, input_shape=input_dim),
        tf.keras.layers.Dense(256, activation=tf.nn.relu),
        tf.keras.layers.Dense(256, activation=tf.nn.relu),
        tf.keras.layers.Dense(3, activation=tf.nn.softmax)
    ])
    return model

model = build_model()

# Compile the model
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.0001),
              metrics=['accuracy'])

23903


In [134]:
model.fit(x_train, y_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x349454760>

In [135]:
model.evaluate(x_test, y_test)



[1.646216630935669, 0.649688720703125]

In [138]:
model.save_weights('./saved_weights/')

In [139]:
import time
import sys
import random

# Function to Simulate Slow Typing
typing_speed = 500 #wpm
def slow_print(t):
    for l in t:
        sys.stdout.write(l)
        sys.stdout.flush()
        time.sleep(random.random()*10.0/typing_speed)
    print('')

def blendiebot(model1):
    slow_print("Hello! My name is BlendieBot!\n") 
    time.sleep(1)
    slow_print("I am an AI model here to see how your are feeling and to provide helpful strategies for navigating this crazy world!\n")
    time.sleep(1)
    slow_print("If you would like to quit ant any time, just type 'quit' whenever asked how you are doing.\n")
    while True:
      slow_print("Give as much information as possible and be explicit. Avoid double negatives and 'not' phrases!\n")
      slow_print("How are you doing?\n")
      response = input()
      if response.lower() == 'quit':
        break

      response = clean(response)
      response = np.array([build_vector(response, mydict)])
      score = np.argmax(model1.predict(response))

      if score == 2:
        slow_print("I am glad to hear that you are doing well today. Would you like to dive deeper into your thoughts? (y or n)\n")
        response = ''
        while response.lower() != "y" and response.lower != "n":
          response = input()
          if response.lower() == "y":
             break
          if response.lower() == "n":
            slow_print("If you are ever feeling down, please come and talk to me! I am always happy to help!\n")
            time.sleep(1)
            slow_print("Goodbye!")
            return
      elif score == 0:
        slow_print("I am sorry to hear that you are not feeling your best.\n")
        time.sleep(1)
        response = ''
        while response.lower() != "y" and response.lower != "n":
          slow_print("Would you like some suggestions to work through what you are feeling? (y or n) \n")
          response = input()
          if response.lower() == "y":
             response = ''
             while response.lower() != "cbt" and response.lower != "ppt":
              slow_print("Would you like Psychodynamic Psychotherapy techniques, or Cognitive Behavioral Therapy techniques? (PPT or CBT)\n")
              response = input()
              if response.lower() == "ppt":
                slow_print("If you are ever in serious trouble, please call 911. Feeling down is not an easy task to navigate.\n")
                slow_print("I suggest you do your best to explore why you are feeling the way that you are, and ask yourself thought provoking questions.\n")
                slow_print("Perhaps reach out to a family member or a close friend, with their consent, to discuss how the relationships in your life may be impacting your feelings.\n")
                slow_print("Lastly, if you are feeling up to it, consider seeing an actual therapist to further help with these challenging thoughts. Remember, there are always people who care about you.\n")
                break
              elif response.lower() == "cbt":
                slow_print("If you are ever in serious trouble, please call 911. Feeling down is not an easy task to navigate.\n")
                slow_print("You are experiencing some difficult negative thoughts and emotions, but it is important to remember that they are distorted and not worth entertaining.\n") 
                slow_print("At this link, <https://tinyurl.com/64c8eycs>, you will find a sheet to help work through these thoughts and make them more closely align with reality.\n")
                slow_print("Lastly, if you are feeling up to it, consider seeing an actual therapist to further help with these challenging times. Remember, there are always people who care about you.\n")
                break
          if response.lower() == "n":
             slow_print("No worries. I am always here to talk!\n")
             break
        break
      else:
        slow_print("It seems that you are feeling pretty neutral. Describe in more detail how you are doing. \n")
    slow_print("I hope I was able to help!\n\nFeel free to come back and talk about whatever is bothering you!\n")
    slow_print("Goodbye!")

In [140]:
model1 = build_model()
model1.load_weights('./saved_weights/')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x349454640>

In [144]:
blendiebot(model1)

Hello! My name is BlendieBot!

I am an AI model here to see how your are feeling and to provide helpful strategies for navigating this crazy world!

If you would like to quit ant any time, just type 'quit' whenever asked how you are doing.

Give as much information as possible and be explicit. Avoid double negatives and 'not' phrases!

How are you doing?

I am glad to hear that you are doing well today. Would you like to dive deeper into your thoughts? (y or n)

If you are ever feeling down, please come and talk to me! I am always happy to help!

Goodbye!
