In [87]:
import pandas as pd
import os
import json
from typing import List
import re

In [88]:
class Intent:
    tag: str
    patterns: List[str]
    responses: List[str]

    def __init__(self, tag: str, patterns: List[str], responses: List[str]) -> None:
        self.tag = tag
        self.patterns = patterns
        self.responses = responses
    
    def __repr__(self) -> str:
        return f"Intent(tag={self.tag}, patterns={self.patterns}, responses={self.responses})"
    
    def __str__(self) -> str:
        return self.__repr__()

In [89]:
intents: List[Intent] = []
with open("basic-intents.json", "r") as json_file:
    initial_intents_json = json.load(json_file)
    for intent in initial_intents_json["intents"]:
        intents.append(Intent(intent["tag"], intent["patterns"], intent["responses"]))

In [90]:
get_all_tags = lambda intents: [intent.tag for intent in intents]

# I want to merge intents that have the same tag
def merge_intents(intents: List[Intent]) -> List[Intent]:
    if intents is None:
        return []
    tags = get_all_tags(intents) if intents is not None and isinstance(intents[0], Intent) else intents
    unique_tags = list(set(tags))
    merged_intents = []
    for tag in unique_tags:
        patterns = []
        responses = []
        for intent in intents:
            if intent.tag == tag:
                patterns.extend(intent.patterns)
                responses.extend(intent.responses)
        merged_intents.append(Intent(tag, patterns, responses))
    return merged_intents

In [91]:
counsel_chat_dataset = pd.read_csv("data/counselchat-data.csv")
counsel_chat_dataset = counsel_chat_dataset.dropna()
counsel_chat_dataset = counsel_chat_dataset[counsel_chat_dataset["questionText"].str.len() > 0]
counsel_chat_dataset = counsel_chat_dataset[counsel_chat_dataset["answerText"].str.len() > 0]

counsel_chat_dataset.head()

Unnamed: 0,questionID,questionTitle,questionText,questionUrl,topics,therapistName,therapistUrl,answerText,upvotes
0,5566fab2a64752d71ec3ca69,Escalating disagreements between mother and wife,My wife and mother are having tense disagreeme...,https://counselchat.com/questions/escalating-d...,Family Conflict,"Kristi King-Morgan, LMSW",https://counselchat.com/therapists/kristi-king...,<p>What you are describing is something psycho...,0.0
1,5566f94fa64752d71ec3ca64,I'm addicted to smoking. How can I stop?,"I'm planning to have baby, so I have to quit s...",https://counselchat.com/questions/i-m-addicted...,"Substance Abuse,Addiction",Rebecca Duellman,https://counselchat.com/therapists/rebecca-due...,<p>Hi. Good for you in planning ahead to do wh...,0.0
2,5567d26887a1cc0c3f3d8f46,Keeping secrets from my family,"I have secrets in my mind, and I don't know wh...",https://counselchat.com/questions/keeping-secr...,Family Conflict,Jeevna Bajaj,https://counselchat.com/therapists/jeevna-bajaj,<p>It sounds like keeping the secrets has beco...,0.0
3,556bed15c969ba5861709df5,The Underlying Causes of Being Possessive,I am extremely possessive in my relationships ...,https://counselchat.com/questions/the-underlyi...,"Behavioral Change,Social Relationships",Rebecca Duellman,https://counselchat.com/therapists/rebecca-due...,<p>Hi there. It's great you are able to realiz...,0.0
4,556ba115c969ba5861709de6,Can I control anxiety without medication?,I had a head injury a few years ago and my min...,https://counselchat.com/questions/can-i-contro...,Anxiety,Rebecca Duellman,https://counselchat.com/therapists/rebecca-due...,<p>You didn't say what or how many medications...,0.0


In [92]:
counsel_chat_intents: List[Intent] = []
all_tags = set(get_all_tags(intents))
for index, row in counsel_chat_dataset.iterrows():
    questionText = row['questionText']
    answerTextHTML = row["answerText"]
    answerTextHTML = answerTextHTML.replace("<p>", "")
    answerTextHTML = answerTextHTML.replace("</p>", "")
    answerTextHTML = answerTextHTML.replace("<br>", "")
    answerTextHTML = answerTextHTML.replace("<br/>", "")
    answerTextHTML = answerTextHTML.replace("nbsp;", "")
    pattern = r'[^A-Za-z0-9.?!\'-;:]'

    answerTextHTML = re.sub(r"[^A-Za-z0-9.?!'-;: ]+", "", answerTextHTML)
    answerTextHTML = answerTextHTML.strip()

    for tag in row['topics'].split(','):
        counsel_chat_intents.append(Intent(tag.strip().lower().replace(' ', '_'), [questionText], [answerTextHTML]))

intents = merge_intents(intents + counsel_chat_intents)
print(len(counsel_chat_intents))

2441


In [93]:
print(intents[len(intents) - 10:len(intents) - 1])



In [94]:
def intent_to_dictionary(intent: Intent) -> dict:
    return {
        "tag": intent.tag,
        "patterns": intent.patterns,
        "responses": intent.responses
    }

intent_dicts = [intent_to_dictionary(intent) for intent in intents]
write_json = {
    "intents": intent_dicts
}

with open("new_intents.json", "w") as json_file:
    json.dump(write_json, json_file, indent=4)

json_file.close()