In [1]:
import re

In [2]:
def clean_corpus(chat_export_file):
    """
    Prepare a WhatsApp chat export for training with chatterbot.
    """
    message_corpus = remove_chat_metadata(chat_export_file)
    cleaned_corpus = remove_non_message_text(message_corpus)
    return cleaned_corpus

def remove_chat_metadata(chat_export_file):
    """
    Remove WhatsApp chat metadata.

    WhatsApp chat exports come with metadata about each message:

     date    time    username  message
    ---------------------------------------
    8/26/22, 17:47 - Jane Doe: Message text

    This function removes all the metadata up to the text of each message.

    Args:
        chat_export_file (str): The name of the chat export file

    Returns:
        tuple: The text of each message in the conversation
    """
    date_time = r"(\d+\/\d+\/\d+,\s\d+:\d+)"  # e.g. "8/26/22, 17:47"
    dash_whitespace = r"\s-\s"  # " - "
    username = r"([\w\s]+)"  # e.g. "Jane Doe"
    metadata_end = r":\s"  # ": "
    # pattern = date_time + dash_whitespace + username + metadata_end
    # pattern = r"\[(.*?)\][^:]*:"
    pattern = r"\[(.*?)\]\s*([^:]+):"

    cleaned_corpus = ""
    with open(chat_export_file, "r") as corpus_file:
        for line in corpus_file.read():
            # content = line.read()
            cleaned_corpus += re.sub(pattern, "", line)
    return tuple(cleaned_corpus.split("\n"))

def remove_non_message_text(export_text_lines):
    """
    Remove conversation-irrelevant text from chat export.

    WhatsApp chat exports come with a standardized intro line,
    and an empty line at the end of the file.
    Text exports also replace media messages with text that isn't
    relevant for the conversation. This function removes all that.

    Args:
        export_text_lines (tuple): All lines from the export file

    Returns:
        tuple: Messages that are a relevant part of the conversation
    """
    messages = export_text_lines[1:-1]

    filter_out_msgs = ("<Media omitted>",)
    return tuple((msg for msg in messages if msg not in filter_out_msgs))

In [3]:
# # pattern = r"^([^()]+)" # outside of ()
# pattern = r"\((.*?)\)" # inside of and including ()

# cleaned_corpus = []
# with open("clinician_one.txt", "r") as corpus_file:
#     splits = corpus_file.read().split("\n")
#     for line in splits:
#         if line[:11] == "Clinician: ":
#             clean_line = re.sub(pattern, "", line[11:])
#             cleaned_corpus.append(clean_line)

# corpus_file.close()

# # cleaned_corpus
# # with open("chat.txt", "w") as corpus_file:
# with open("chat.txt", "w") as fout:
#     for line in cleaned_corpus:
#         fout.write(line + "\n")

# fout.close()

In [4]:
import json

In [5]:
data = json.load(open("data/phq-9.json"))
data
# for i in range(len(data['sections'])):
#     # print (i, len(data['sections'][i]['questions']))
#     print (i, data['sections'][i]['questions'])

# print(len(data['sections'][0]))
# data['sections'][0]#['questions']

{'name': 'PHQ-9',
 'description': None,
 'sections': [{'number': 1,
   'scale': {'Not at all': 0,
    'Several Days': 1,
    'More than half the days': 2,
    'Nearly every day': 3},
   'prefix': 'Over the last two weeks, how often have you been bothered by',
   'questions': ['having little interest or pleasure in doing things?',
    'feeling down, depressed, or hopeless?',
    'having trouble falling or staying asleep, or sleeping too much?',
    'feeling tired or having little energy?',
    'having poor appetite or overeating?',
    'feeling bad about yourself - or that you are a failure or have let yourself or your family down?',
    'having trouble concentrating on things, such as reading the newspaper or watching television?',
    'moving or speaking so slowly that other people could have noticed? Or the opposite – being so fidgety or restless that you have been moving around a lot more than usual?',
    'having thoughts that you would be better off dead or of hurting yourself in 

In [6]:
import json

def json_loader(folder, filename):
    with open(folder + filename) as fh:
        file = json.load(fh)
    return file

# questionnaire_loader = lambda name: json_loader("data/phq-9.json", name)

# index = questionnaire_loader("data/phq-9.json")
# index = json_loader("data/phq-9.json")
data = json.load(open("data/phq-9.json"))
data

{'name': 'PHQ-9',
 'description': None,
 'sections': [{'number': 1,
   'scale': {'Not at all': 0,
    'Several Days': 1,
    'More than half the days': 2,
    'Nearly every day': 3},
   'prefix': 'Over the last two weeks, how often have you been bothered by',
   'questions': ['having little interest or pleasure in doing things?',
    'feeling down, depressed, or hopeless?',
    'having trouble falling or staying asleep, or sleeping too much?',
    'feeling tired or having little energy?',
    'having poor appetite or overeating?',
    'feeling bad about yourself - or that you are a failure or have let yourself or your family down?',
    'having trouble concentrating on things, such as reading the newspaper or watching television?',
    'moving or speaking so slowly that other people could have noticed? Or the opposite – being so fidgety or restless that you have been moving around a lot more than usual?',
    'having thoughts that you would be better off dead or of hurting yourself in 

In [7]:
int_answers = []

def ask(question, scale, answers_list):
    print(question)
    print("\n".join([f"{word}: {score}" for word, score in scale.items()]))
    possible_answers = set(scale.values())
    output = None
    while output is None:
        try:
            answer = int(input("Answer: "))
            if answer in possible_answers:
                output = answer
                answers_list.append(output)
            else: raise(ValueError)
        except:
            print("Not a valid answer, please enter an integer from the list of possible responses.")
    return answers_list

In [8]:
# int_answers = []

# for i in range(len(data["sections"])):
#     section = data["sections"][i]
#     for quest_id in range(len(section['questions'])):
#         ask(section["prefix"] + " " + section['questions'][quest_id], section["scale"], int_answers)

# int_answers

In [9]:
CORPUS_FILE = []
with open("data/chat.txt", "r") as file:
    for line in file.read().split("\n"):
        CORPUS_FILE.append(line.strip())

CORPUS_FILE

['What is your full name?',
 'My full name is Jane Smith.',
 'When and where were you born?',
 'I was born in Los Angeles, California on June 12, 1985.',
 'What is your occupation?',
 "I'm a software engineer.",
 'What are your hobbies and interests?',
 'I enjoy reading, hiking, and playing the guitar.',
 'Do you have any siblings? If so, how many and what are their names?',
 'Yes, I have two siblings. Their names are John and Sarah.',
 'What is your favorite book?',
 "My favorite book is 'To Kill a Mockingbird' by Harper Lee.",
 'What is your favorite movie?',
 "My favorite movie is 'The Shawshank Redemption'.",
 'What is your favorite TV show?',
 "My favorite TV show is 'Friends'.",
 'What is your favorite food?',
 'My favorite food is sushi.',
 'Are you single or in a relationship?',
 "I'm currently in a relationship.",
 'Do you have any children? If so, how many and what are their names?',
 "No, I don't have any children.",
 'What is your favorite vacation destination?',
 'My favor

In [10]:
questions = [    "What is your full name?",    "When and where were you born?",    "What is your occupation?",    "What are your hobbies and interests?",    "Do you have any siblings? If so, how many and what are their names?",    "What is your favorite book?",    "What is your favorite movie?",    "What is your favorite TV show?",    "What is your favorite food?",    "Are you single or in a relationship?",    "Do you have any children? If so, how many and what are their names?",    "What is your favorite vacation destination?",    "What is your favorite type of music?",    "What is your favorite sports team?",    "What is your favorite animal?",    "What is your favorite color?",    "What is your favorite type of weather?",    "What is your favorite season?",    "What is your favorite holiday?",    "What is your favorite childhood memory?"]
responses = [    "My full name is Jane Smith.",    "I was born in Los Angeles, California on June 12, 1985.",    "I'm a software engineer.",    "I enjoy reading, hiking, and playing the guitar.",    "Yes, I have two siblings. Their names are John and Sarah.",    "My favorite book is 'To Kill a Mockingbird' by Harper Lee.",    "My favorite movie is 'The Shawshank Redemption'.",    "My favorite TV show is 'Friends'.",    "My favorite food is sushi.",    "I'm currently in a relationship.",    "No, I don't have any children.",    "My favorite vacation destination is Hawaii.",    "I enjoy listening to pop and rock music.",    "My favorite sports team is the Los Angeles Lakers.",    "My favorite animal is the dolphin.",    "My favorite color is blue.",    "I love sunny weather with a light breeze.",    "My favorite season is fall.",    "My favorite holiday is Christmas.",    "My favorite childhood memory is going on road trips with my family."]

training_list = []
for i in range(len(questions)):
    training_list.append(questions[i])
    training_list.append(responses[i])

print(len(training_list))
training_list[:10]

with open("data/chat.txt", "w") as fout:
    for line in training_list:
        fout.write(line + "\n")

fout.close()

40


In [11]:
# # pattern = r"^([^()]+)" # outside of ()
pattern = r"\((.*?)\)" # inside of and including ()

# cleaned_corpus = []
# with open("clinician_one.txt", "r") as corpus_file:
#     splits = corpus_file.read().split("\n")
#     for line in splits:
#         if line[:11] == "Clinician: ":
#             clean_line = re.sub(pattern, "", line[11:])
#             cleaned_corpus.append(clean_line)


In [12]:
supportive_responses = [    "I'm here for you.",    "You're not alone in this.",    "I'm sorry you're going through this.",    "I can't imagine how hard this must be for you.",    "Is there anything I can do to help?",    "Let's take it one day at a time.",    "You're strong and you can get through this.",    "I believe in you.",    "You're doing the best you can.",    "It's okay to not be okay.",    "I'm proud of you for reaching out.",    "You're a survivor.",    "You're important to me.",    "I care about you.",    "I'm here to listen.",    "You're not a burden.",    "You're not weak for asking for help.",    "I'm glad you're here.",    "I'm grateful to have you as a friend.",    "You're not alone, I'm here to support you.",    "I can understand why you're feeling this way.",    "You're not the only one who has gone through this.",    "We can get through this together.",    "You're not defined by your struggles.",    "You're worthy of love and support.",    "I'm sending you positive thoughts and vibes.",    "You're important and valued.",    "You're not alone, we're all here for you.",    "I'm sorry this is happening to you.",    "I'm here to support you in any way I can.",    "You're not a failure.",    "You're not alone, there are people who care about you.",    "It's okay to take time for yourself.",    "You're allowed to feel however you feel.",    "You're not alone, others have gone through this too.",    "You're not alone, I'm here to help you get through this.",    "I'm proud of you for taking steps to take care of yourself.",    "You're not alone, we'll face this together.",    "I'm here to support you and listen to you.",    "It's okay to ask for help.",    "You're not alone, I'm here to stand by your side.",    "You're not alone, I'm here to walk with you through this.",    "You're not alone, I'm here to offer a shoulder to cry on.",    "I'm here to help you in any way I can.",    "You're not alone, I'm here to support you through this.",    "You're not alone, we'll get through this together.",    "I'm here to help you find resources and support.",    "You're not alone, I'm here to offer my love and support.",    "You're not alone, I'm here to be a listening ear.",    "You're not alone, I'm here to remind you of your strength.",    "You're not alone, I'm here to help you see your worth."]
supportive_responses[:10]

["I'm here for you.",
 "You're not alone in this.",
 "I'm sorry you're going through this.",
 "I can't imagine how hard this must be for you.",
 'Is there anything I can do to help?',
 "Let's take it one day at a time.",
 "You're strong and you can get through this.",
 'I believe in you.',
 "You're doing the best you can.",
 "It's okay to not be okay."]

In [13]:
# cleaned_lines = []
# pattern = r"^.*: " # doesn't include metadata
# with open("data/chat1.txt", "r") as file:
#     for line in file.readlines()[3:]:
#         cleaned_lines.append(re.sub(pattern, "", line).replace("\n", "").strip())

# cleaned_lines[:10]

with open("data/bot_responses.txt", "w") as fout:
    for line in supportive_responses:
        fout.write(line + "\n")

fout.close()

In [14]:
from chatbot_diagnostics import *
import json

PHQ9(filename = "phq-9.json")

TypeError: 'NoneType' object is not callable

In [16]:
test_dict = {
    "0": "never",
    "1": "sometimes",
    "2": "sdf",
    "3": "nasdf",
}

print(test_dict)

{'0': 'never', '1': 'sometimes', '2': 'sdf', '3': 'nasdf'}
