In [48]:
! pip install tensorflow scikit-learn pandas numpy pickle5



In [49]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [50]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense, Dropout, LSTM, SimpleRNN, Bidirectional
import pickle5 as pickle

In [51]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/tweet_emotions.csv')

In [52]:
print(df.head())

     tweet_id   sentiment                                            content
0  1956967341       empty  @tiffanylue i know  i was listenin to bad habi...
1  1956967666     sadness  Layin n bed with a headache  ughhhh...waitin o...
2  1956967696     sadness                Funeral ceremony...gloomy friday...
3  1956967789  enthusiasm               wants to hang out with friends SOON!
4  1956968416     neutral  @dannycastillo We want to trade with someone w...


In [53]:
df = df[['sentiment', 'content']]
df = df.sample(frac=1).reset_index(drop=True)

In [54]:
tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df['content'])
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(df['content'])
padded_sequences = pad_sequences(sequences, maxlen=100, truncating='post')

In [55]:
sentiment_labels = pd.get_dummies(df['sentiment']).values
print(pd.get_dummies(df['sentiment']))

       anger  boredom  empty  enthusiasm  fun  happiness  hate  love  neutral  \
0          0        0      0           0    0          1     0     0        0   
1          0        0      0           0    0          0     0     1        0   
2          0        0      0           0    0          0     0     0        0   
3          0        0      0           0    0          0     0     0        0   
4          0        0      0           0    0          0     0     0        0   
...      ...      ...    ...         ...  ...        ...   ...   ...      ...   
39995      0        0      0           0    0          0     0     0        0   
39996      0        0      1           0    0          0     0     0        0   
39997      0        0      0           0    0          0     0     0        0   
39998      0        0      0           0    0          0     0     0        0   
39999      0        0      0           0    0          0     0     1        0   

       relief  sadness  sur

In [56]:
x_train, x_test, y_train, y_test = train_test_split(padded_sequences, sentiment_labels, test_size=0.2)

In [57]:
model = Sequential()
model.add(Embedding(5000, 100, input_length=100))
model.add(Conv1D(64, 5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(13, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 100, 100)          500000    
                                                                 
 conv1d (Conv1D)             (None, 96, 64)            32064     
                                                                 
 global_max_pooling1d (Globa  (None, 64)               0         
 lMaxPooling1D)                                                  
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_2 (Dense)             (None, 13)                429       
                                                      

In [59]:
model = Sequential()
model.add(Embedding(5000, 100, input_length=100))
model.add(SimpleRNN(100, return_sequences=True))
model.add(SimpleRNN(100))
model.add(Dense(13, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()



Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 100, 100)          500000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 100, 100)          20100     
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 100)               20100     
                                                                 
 dense_4 (Dense)             (None, 13)                1313      
                                                                 
Total params: 541,513
Trainable params: 541,513
Non-trainable params: 0
_________________________________________________________________


In [60]:
model = Sequential()
model.add(Embedding(5000, 100, input_length=100))
model.add(Bidirectional(LSTM(200, activation = 'relu')))
model.add(Dense(13, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 100, 100)          500000    
                                                                 
 bidirectional_1 (Bidirectio  (None, 400)              481600    
 nal)                                                            
                                                                 
 dense_5 (Dense)             (None, 13)                5213      
                                                                 
Total params: 986,813
Trainable params: 986,813
Non-trainable params: 0
_________________________________________________________________


In [61]:
model.fit(x_train, y_train, epochs=3, batch_size=32, validation_data=(x_test, y_test))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7a528c9698d0>

In [62]:
y_pred = np.argmax(model.predict(x_test), axis=-1)
print("Accuracy:", accuracy_score(np.argmax(y_test, axis=-1), y_pred))

Accuracy: 0.319375


In [63]:
model.save('sentiment_analysis_model.h5')
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [64]:
import keras

model = keras.models.load_model('sentiment_analysis_model.h5')
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [65]:
import io
import random
import string
import warnings
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [66]:
pip install nltk



In [67]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('popular', quiet=True)

True

In [68]:
f=open('/content/drive/MyDrive/Chatbot.txt','r',errors = 'ignore')
raw=f.read()
raw = raw.lower()

In [69]:
sent_tokens = nltk.sent_tokenize(raw)
word_tokens = nltk.word_tokenize(raw)

In [70]:
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

In [71]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up","hey",)
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]
def greeting(sentence):

    for word in sentence.split():
        if word.lower() in GREETING_INPUTS:
            return random.choice(GREETING_RESPONSES)

In [72]:
def response(user_response):
    robo_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response

In [73]:
import random

question_list = ["How are you feeling?", "How have you been?", "Is something worrying you?", "Are you having trouble sleeping? If so, why?", "Is there anything you want to talk about?", "How would you like things to be different?", "Did something upset you today?", "Is someone bothering you?"]


randomQuestion = random.choice(question_list)
question_list.remove(randomQuestion)
randomQuestion2 = random.choice(question_list)
question_list.remove(randomQuestion2)
randomQuestion3 = random.choice(question_list)
question_list.remove(randomQuestion3)

In [None]:
flag=True


while(flag == True):

  print("AMES: My name is AMES. I will be asking you a few questions about how you are feeling. If you want to exit, type Bye!")


  print(randomQuestion)
  user_response = input()
  text_sequence = tokenizer.texts_to_sequences([user_response])
  text_sequence = pad_sequences(text_sequence, maxlen=100)

  sentiment = model.predict(text_sequence)

  # Using numpy.array.argmax()
  max_index= np.array(sentiment).argmax()

  real_sentiment1 = "any"


  if(max_index==0):
    real_sentiment1= "anger"
  elif(max_index==1):
    real_sentiment1= "boredom"
  elif(max_index==2):
    real_sentiment1 = "empty"
  elif(max_index==3):
    real_sentiment1 = "enthusiasm"
  elif(max_index==4):
    real_sentiment1= "fun"
  elif(max_index==5):
    real_sentiment1 = "happiness"
  elif(max_index==6):
    real_sentiment1= "hate"
  elif(max_index==7):
    real_sentiment1 = "love"
  elif(max_index==8):
    real_sentiment1= "neutral"
  elif(max_index==9):
    real_sentiment1 = "relief"
  elif(max_index==10):
    real_sentiment1= "sadness"
  elif(max_index==11):
    real_sentiment1 = "surprise"
  elif(max_index==12):
    real_sentiment1= "worry"

  if real_sentiment1 == "sadness" or real_sentiment1 == "empty" or real_sentiment1 == "boredom":
    print("I am sorry you are feeling this way. Is there anything I can do to help? Would you like to continue talking about it to me or someone else?")
  elif real_sentiment1 == "happiness" or real_sentiment1 == "enthusiasm" or real_sentiment1 == "love" or real_sentiment1 == "relief" or real_sentiment1 =="fun":
    print("Wow that is so great, I am so happy for you")
  elif real_sentiment1 == "hate" or real_sentiment1 == "anger":
    print("I am sorry you feel this way. Can you tell me more about what is making you feel this way?")
  elif(real_sentiment1 == "surprise"):
    print("Why do you feel surprised?")
  elif(real_sentiment1 == "worry"):
    print("I am sorry you are worried, but remember to not always worry about things you can't control to and to focus on what you can.")

  user_response4= input()
  if user_response4 != "bye":
    print(randomQuestion2)
    user_response2= input()
    text_sequence = tokenizer.texts_to_sequences([user_response2])
    text_sequence = pad_sequences(text_sequence, maxlen=100)

    sentiment2 = model.predict(text_sequence)


    # Using numpy.array.argmax()
    max_index2 = np.array(sentiment2).argmax()
    real_sentiment2 = "any"
    if(max_index2==0):
      real_sentiment2= "anger"
    elif(max_index2==1):
      real_sentiment2= "boredom"
    elif(max_index2==2):
      real_sentiment2 = "empty"
    elif(max_index2==3):
      real_sentiment2 = "enthusiasm"
    elif(max_index2==4):
      real_sentiment2= "fun"
    elif(max_index2==5):
      real_sentiment2 = "happiness"
    elif(max_index2==6):
      real_sentiment2= "hate"
    elif(max_index2==7):
      real_sentiment2 = "love"
    elif(max_index2==8):
      real_sentiment2= "neutral"
    elif(max_index2==9):
      real_sentiment2 = "relief"
    elif(max_index2==10):
      real_sentiment2= "sadness"
    elif(max_index2==11):
      real_sentiment2 = "surprise"
    elif(max_index2==12):
      real_sentiment2= "worry"

    if real_sentiment2 == "sadness" or real_sentiment2 == "empty" or real_sentiment2 == "boredom":
      print("I am sorry you are feeling this way. Is there anything I can do to help? Would you like to continue talking about it to me or someone else?")
    elif real_sentiment2 == "happiness" or real_sentiment2 == "enthusiasm" or real_sentiment2 == "love" or real_sentiment2 == "relief" or real_sentiment2 =="fun":
      print("Wow that is so great, I am so happy for you")
    elif real_sentiment2 == "hate" or real_sentiment2 == "anger":
      print("I am sorry you feel this way. Can you tell me more about what is making you feel this way?")
    elif(real_sentiment2 == "surprise"):
      print("Why do you feel surprised?")
    elif(real_sentiment2 == "worry"):
      print("I am sorry you are worried, but remember to not always worry about things you can't control to and to focus on what you can.")

  user_response5= input()
  if (user_response5 != "bye"):
    print(randomQuestion3)
    user_response3 = input()
    text_sequence = tokenizer.texts_to_sequences([user_response3])
    text_sequence = pad_sequences(text_sequence, maxlen=100)

    sentiment3 = model.predict(text_sequence)

    # Using numpy.array.argmax()
    max_index3= np.array(sentiment3).argmax()
    real_sentiment3 = "any"

    if(max_index3==0):
      real_sentiment3= "anger"
    elif(max_index3==1):
      real_sentiment3= "boredom"
    elif(max_index3==2):
      real_sentiment3 = "empty"
    elif(max_index3==3):
      real_sentiment3 = "enthusiasm"
    elif(max_index3==4):
      real_sentiment3= "fun"
    elif(max_index3==5):
      real_sentiment3 = "happiness"
    elif(max_index3==6):
      real_sentiment3= "hate"
    elif(max_index3==7):
      real_sentiment3 = "love"
    elif(max_index3==8):
      real_sentiment3= "neutral"
    elif(max_index3==9):
      real_sentiment3 = "relief"
    elif(max_index3==10):
      real_sentiment3= "sadness"
    elif(max_index3==11):
      real_sentiment3 = "surprise"
    elif(max_index3==12):
      real_sentiment3= "worry"
    if real_sentiment3 == "sadness" or real_sentiment3 == "empty" or real_sentiment3 == "boredom":
      print("I am sorry you are feeling this way. Is there anything I can do to help? Would you like to continue talking about it to me or someone else?")
    elif real_sentiment3 == "happiness" or real_sentiment3 == "enthusiasm" or real_sentiment3 == "love" or real_sentiment3 == "relief" or real_sentiment3 =="fun":
      print("Wow that is so great, I am so happy for you")
    elif real_sentiment3 == "hate" or real_sentiment3 == "anger":
      print("I am sorry you feel this way. Can you tell me more about what is making you feel this way?")
    elif(real_sentiment3 == "surprise"):
      print("Why do you feel surprised?")
    elif(real_sentiment3 == "worry"):
      print("I am sorry you are worried, but remember to not always worry about things you can't control to and to focus on what you can.")
    user_reponse6 = input()
  flag = False


















AMES: My name is AMES. I will be asking you a few questions about how you are feeling. If you want to exit, type Bye!
How are you feeling?
Not good
Wow that is so great, I am so happy for you
thanks
How have you been?
Not good
Wow that is so great, I am so happy for you
thanks
Is something worrying you?
Yes 
Wow that is so great, I am so happy for you
bye


In [None]:
L = ["How are you feeling?\n", "How have you been?\n", "Is something worrying you?\n", "Are you having trouble sleeping? If so, why?\n", "Is there anything you want to talk about?\n", "How would you like things to be different?\n", "Did something upset you today?\n", "Is someone bothering you?\n"]

# writing to file
file1 = open('Chatbot.txt', 'w')
file1.writelines(L)
file1.close()

# Using readlines()
file1 = open('Chatbot.txt', 'r')
Lines = file1.readlines()

count = 0
# Strips the newline character
for line in Lines:
    count += 1
    print("Line{}: {}".format(count, line.strip()))

Line1: How are you feeling?
Line2: How have you been?
Line3: Is something worrying you?
Line4: Are you having trouble sleeping? If so, why?
Line5: Is there anything you want to talk about?
Line6: How would you like things to be different?
Line7: Did something upset you today?
Line8: Is someone bothering you?


In [None]:
def Convert(string):
    li = list(string.split("-"))
    return li


# Driver code
str1 = "How are you feeling?-How have you been?-Is something worrying you?-Are you having trouble sleeping? If so, why?-Is there anything you want to talk about?-How would you like things to be different?-Did something upset you today?-Is someone bothering you?"
print(Convert(str1))

['How are you feeling?', 'How have you been?', 'Is something worrying you?', 'Are you having trouble sleeping? If so, why?', 'Is there anything you want to talk about?', 'How would you like things to be different?', 'Did something upset you today?', 'Is someone bothering you?']
