### 1 - Importing necessary libraries

In [1]:
# import required libraries
import pandas as pd
import numpy as np
import seaborn as sns

import torch

In [2]:
df = pd.read_csv("../data/raw/prompts_v1.csv")
df.head()

Unnamed: 0,prompt,cluster,sub_class
0,How can I integrate a chatbot into my website ...,Communication,Chatbots and Virtual Assistants
1,What are the best practices for designing a co...,Communication,Chatbots and Virtual Assistants
2,"Can chatbots handle complex queries, or are th...",Communication,Chatbots and Virtual Assistants
3,What platforms are available for building cust...,Communication,Chatbots and Virtual Assistants
4,How do I ensure my chatbot understands user in...,Communication,Chatbots and Virtual Assistants


### 2 - Initializing target maps

In [3]:
subclass_target_map = {}
subclasses = list(df["sub_class"].unique())
for i, subclass in enumerate(subclasses):
    subclass_target_map[subclass] = i
subclass_target_map

{'Chatbots and Virtual Assistants': 0,
 'Conversation': 1,
 'Mental Health': 2,
 'Music Creation': 3,
 'Speech Generation': 4,
 'Podcast Content Creation': 5,
 'Coding and Programming Assistance': 6,
 'API Integration': 7,
 'Presentation Creation': 8,
 'Email Generation': 9}

In [4]:
cluster_target_map = {}
clusters = list(df["cluster"].unique())
for i, cluster in enumerate(clusters):
    cluster_target_map[cluster] = i
cluster_target_map

{'Communication': 0,
 'Music and Audio': 1,
 'Programming and Development': 2,
 'Business and Productivity': 3}

### 3 - Loading the models for clusters and sub classes

In [6]:
from transformers import pipeline

subclass_model = pipeline(
    "text-classification", model="../models/subclass_models_v1/checkpoint-84"
)
cluster_model = pipeline(
    "text-classification", model="../models/cluster_models_v1/checkpoint-56"
)

### 4 - Making predictions

In [7]:
def get_label(d):
    return int(d["label"].split("_")[1])

In [12]:
def predict_cluster(text):
    prediction = cluster_model(text)
    if prediction[0]["score"] < 0.6:
        return "General Model"
    label = get_label(prediction[0])
    cluster = list(cluster_target_map.keys())[label]
    return cluster, prediction[0]["score"]

In [19]:
def predict_subclass(text):
    prediction = subclass_model(text)
    if prediction[0]["score"] < 0.5:
        return "General Model", prediction[0]
    label = get_label(prediction[0])
    subclass = list(subclass_target_map.keys())[label]
    return subclass, prediction[0]["score"]

In [20]:
predict_cluster("How to create an API?")

('Programming and Development', 0.9439895153045654)

In [21]:
predict_subclass("How to create an API?")

('API Integration', 0.5087920427322388)

In [22]:
predict_cluster("Which doctor is the best?")

('Communication', 0.7689138054847717)

In [23]:
predict_subclass("Which doctor is the best?")

('General Model', {'label': 'LABEL_2', 'score': 0.28602322936058044})

### 5 - Functionalizing the code

In [30]:
def get_cluster_label_and_score(prediction):
    idx = int(prediction[0]["label"].split("_")[1])
    label = list(cluster_target_map.keys())[idx]
    score = prediction[0]["score"]
    return {"label": label, "score": score}

In [36]:
def get_subclass_label_and_score(prediction):
    idx = int(prediction[0]["label"].split("_")[1])
    label = list(subclass_target_map.keys())[idx]
    score = prediction[0]["score"]
    return {"label": label, "score": score}

In [47]:
def get_cluster_from_subclass(subclass):
    cluster = df[df["sub_class"] == subclass]["cluster"].values[0]
    return cluster

In [61]:
def predict_prompt(text):
    subclass = get_subclass_label_and_score(subclass_model(text))
    if subclass["score"] > 0.5:
        cluster = get_cluster_from_subclass(subclass["label"])
        return f"Subclass: {subclass['label']} | Cluster: {cluster}"
    else:
        cluster = get_cluster_label_and_score(cluster_model(text))
        if cluster["score"] > 0.7:
            return f"Cluster: {cluster['label']} | Subclass: N/A (Suggested: {subclass['label']})"
        return "General Model"

In [62]:
text = "Give me steps for digital marketing"

In [63]:
predict_prompt(text)

'General Model'

In [65]:
text = "How to help a friend with depression"

In [66]:
predict_prompt(text)

'Subclass: Mental Health | Cluster: Communication'

In [68]:
text = "Help me write an email for my friend"
predict_prompt(text)

'Subclass: Email Generation | Cluster: Business and Productivity'