<a href="https://colab.research.google.com/github/bhardwajchetna/Zero-shot-Classifier/blob/main/Cheatsheet_Zero_shot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Installing the packages
!pip install ftfy
!pip install spacy
!pip install transformers



In [None]:
# Importing the libraries
import numpy as np
import pandas as pd
import ftfy
import string
import pprint
import re, nltk, spacy
from torch.optim.adam import Adam
from transformers import pipeline
from numpy import argmax

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# Step 1: Reading the input files

# Replace here the name of csv file containing reviews
reviews = pd.read_csv('/content/gdrive/Shareddrives/Job Prep/cheatsheet docs/Google_Reviews.csv')

# Replace here the csv file of classes that you want to predict using descriptive names
layers = pd.read_csv('/content/gdrive/Shareddrives/Job Prep/cheatsheet docs/layers.csv')

In [None]:
# A look into format of Reviews file
reviews.head(10)

Unnamed: 0,text
0,My husband and I recently received our second ...
1,Miss Teal up on the 2nd floor in Optical Dept ...
2,If I could give this hospital negative stars I...
3,Don’t be fooled by their awards and notoriety....
4,Very disappointing ER experience.\nThe room we...
5,"I got my COVID vaccination, today at UCDAVIS M..."
6,Working in EMS currently waiting a wall time o...
7,I brought my father here after he was hospital...
8,My Mom was in ER from Wednesday 11am until adm...
9,Very unprofessional people. No compassion what...


In [None]:
# A look into format of classes & subclasses to predict
layers.head(10)

Unnamed: 0,Subtheme,Subclass1,Subclass2,Subclass3,Subclass4,Subclass5,Subclass6
0,Medication,Pain Management,Infection Control,Plan of Care,Prescription,Communication,
1,Cleaniless,Patient Area,Public Space,Personal Hygiene,,,
2,Physical Enviornment,Noise,Parking,Temperature,Ambience,,
3,Communication,Doctor,Interpreter,Staff,Interns,,
4,Behavior,Compassion and Empathy,Rude,Attentive,Dismissive,,
5,Wait Time,Call back,Waiting,Telephone Acess,,,
6,Finance,Billing,Cost of Care,Discharge Dispute,,,
7,Decedent Affairs,End of Life,Withdrawl of Care,,,,
8,Care Quality,Responsiveness,Listening,Knowledgeable,Detailed,Follow up,
9,Treatment,Procedure,Surgery,Blood Drawing,Tests,,


In [None]:
# Step 2: Converting the classes & subclasses to predict into a dictionary

# Using Melt to aggregate the layers
df_layers = layers.melt(id_vars=["Subtheme"],
             var_name="Subclass",  
             value_name="Topics")

# Cleaning the themes 
df_layers = df_layers.dropna()
df_layers.drop('Subclass', inplace=True, axis=1)

# Converting the dataframe to final dictionary
thisdict={k: g["Topics"].tolist() for k,g in df_layers.groupby("Subtheme")}

# A look into the Dictionary of Subthemes & subclasses
pprint.pprint(thisdict)

{'Abuse Allegations': ['Physical', 'Verbal', 'Sexual'],
 'Behavior': ['Compassion and Empathy', 'Rude', 'Attentive', 'Dismissive'],
 'Care Quality': ['Responsiveness',
                  'Listening',
                  'Knowledgeable',
                  'Detailed',
                  'Follow up'],
 'Cleaniless': ['Patient Area', 'Public Space', 'Personal Hygiene'],
 'Communication': ['Doctor', 'Interpreter', 'Staff', 'Interns'],
 'Decedent Affairs': ['End of Life', 'Withdrawl of Care'],
 'Diagnosis': ['Tests',
               'Reports',
               'Explanation',
               'Charts',
               'Explanation',
               'Interpretation'],
 'Finance': ['Billing', 'Cost of Care', 'Discharge Dispute'],
 'Interpersonal': ['Empathy', 'Friendly', 'Patience', 'Honesty'],
 'Medication': ['Pain Management',
                'Infection Control',
                'Plan of Care',
                'Prescription',
                'Communication'],
 'Physical Enviornment': ['Noise', 'Parking'

In [None]:
# Step 3: Setting up the zero-shot classifier
zero_shot_classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli', device=0)

In [None]:
# Step 4: Zero-shot classification for only 'One class'
classes = layers['Subtheme']
hypothesis_template = "This text is about {}."

# Initializing the objects
d = []
predicted_class = ""

# Iterating through the reviews
for i, row in reviews.iterrows():
    predicted_class = ""
    text = row['text']
    text = ftfy.fix_text(text)
    text = text.translate(str.maketrans(dict.fromkeys(string.punctuation)))
    results = zero_shot_classifier(text, classes, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES = results["scores"]
    CLASSES = results["labels"]
    BEST_INDEX = argmax(SCORES)
    predicted_class = CLASSES[BEST_INDEX]
    d.append(
        {
            'text': row['text'],
            'predicted_class1': CLASSES[BEST_INDEX],
            'predicted_class2': CLASSES[BEST_INDEX+1],
            'predicted_class3': CLASSES[BEST_INDEX+2],
            'predicted_class4': CLASSES[BEST_INDEX+3]
        }
    )
    print("Classifying the " + str(i+1) + " review")

print("Classification Complete")

# Downloading the final result 
from google.colab import files
df_ZeroShot_OnlyClasses = pd.DataFrame(d)
df_ZeroShot_OnlyClasses.to_csv('Topic_Model_ZeroShot_OnlyClasses.csv', index=False)
files.download('Topic_Model_ZeroShot_OnlyClasses.csv')

Classifying the 1 review
Classifying the 2 review
Classifying the 3 review
Classifying the 4 review
Classifying the 5 review
Classifying the 6 review
Classifying the 7 review
Classifying the 8 review
Classifying the 9 review
Classifying the 10 review
Classifying the 11 review
Classifying the 12 review
Classifying the 13 review
Classifying the 14 review
Classifying the 15 review
Classifying the 16 review
Classifying the 17 review
Classifying the 18 review
Classifying the 19 review
Classifying the 20 review
Classifying the 21 review
Classifying the 22 review
Classifying the 23 review
Classifying the 24 review
Classifying the 25 review
Classifying the 26 review
Classifying the 27 review
Classifying the 28 review
Classifying the 29 review
Classifying the 30 review
Classifying the 31 review
Classifying the 32 review
Classifying the 33 review
Classifying the 34 review
Classifying the 35 review
Classifying the 36 review
Classifying the 37 review
Classifying the 38 review
Classifying the 39 re

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Step 5: Zero-shot classification for only 'Multi Subclass'
classes = layers['Subtheme']
hypothesis_template = "This text is about {}."

# Initializing the objects
d = []
predicted_class = ""

# Iterating through the reviews
for i, row in reviews.iterrows():
    predicted_class = ""
    text = row['text']
    text = ftfy.fix_text(text)
    text = text.translate(str.maketrans(dict.fromkeys(string.punctuation)))
    results = zero_shot_classifier(text, classes, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES = results["scores"]
    CLASSES = results["labels"]
    BEST_INDEX = argmax(SCORES)
    predicted_class = CLASSES[BEST_INDEX]
    
    # For the first topic
    nc = thisdict[predicted_class]
    results_sub = zero_shot_classifier(text, nc, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES_sub = results_sub["scores"]
    CLASSES_sub = results_sub["labels"]
    BEST_INDEX_sub = argmax(SCORES_sub)
    predicted_class_sub1 = CLASSES_sub[BEST_INDEX_sub]
    
    # For the second topic
    predicted_class = CLASSES[BEST_INDEX+1]
    nc = thisdict[predicted_class]
    results_sub = zero_shot_classifier(text, nc, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES_sub = results_sub["scores"]
    CLASSES_sub = results_sub["labels"]
    BEST_INDEX_sub = argmax(SCORES_sub)
    predicted_class_sub2 = CLASSES_sub[BEST_INDEX_sub]
   
    # For the third topic
    predicted_class = CLASSES[BEST_INDEX+2]
    nc = thisdict[predicted_class]
    results_sub = zero_shot_classifier(text, nc, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES_sub = results_sub["scores"]
    CLASSES_sub = results_sub["labels"]
    BEST_INDEX_sub = argmax(SCORES_sub)
    predicted_class_sub3 = CLASSES_sub[BEST_INDEX_sub]
       
    # For the fourth topic
    predicted_class = CLASSES[BEST_INDEX+3]
    nc = thisdict[predicted_class]
    results_sub = zero_shot_classifier(text, nc, hypothesis_template=hypothesis_template, multi_label=True)
    SCORES_sub = results_sub["scores"]
    CLASSES_sub = results_sub["labels"]
    BEST_INDEX_sub = argmax(SCORES_sub)
    predicted_class_sub4 = CLASSES_sub[BEST_INDEX_sub]
    
    d.append(
            {
                'text': row['text'],
                'predicted_class1': CLASSES[BEST_INDEX],
                'predicted_class2': CLASSES[BEST_INDEX+1],
                'predicted_class3': CLASSES[BEST_INDEX+2],
                'predicted_class4': CLASSES[BEST_INDEX+3],
                'predicted_sub1': predicted_class_sub1,
                'predicted_sub2': predicted_class_sub2,
                'predicted_sub3': predicted_class_sub3,
                'predicted_sub4': predicted_class_sub4
            }
        )
    print("Classifying the " + str(i+1) + " review")

print("Classification Complete")

# Downloading the final result 
from google.colab import files
df_ZeroShot_MultiSubclass = pd.DataFrame(d)
df_ZeroShot_MultiSubclass.to_csv('Topic_Model_ZeroShot_MultiSubclass.csv', index=False)
files.download('Topic_Model_ZeroShot_MultiSubclass.csv')

Classifying the 1 review
Classifying the 2 review
Classifying the 3 review
Classifying the 4 review
Classifying the 5 review
Classifying the 6 review
Classifying the 7 review
Classifying the 8 review
Classifying the 9 review
Classifying the 10 review
Classifying the 11 review
Classifying the 12 review
Classifying the 13 review
Classifying the 14 review
Classifying the 15 review
Classifying the 16 review
Classifying the 17 review
Classifying the 18 review
Classifying the 19 review
Classifying the 20 review
Classifying the 21 review
Classifying the 22 review
Classifying the 23 review
Classifying the 24 review
Classifying the 25 review
Classifying the 26 review
Classifying the 27 review
Classifying the 28 review
Classifying the 29 review
Classifying the 30 review
Classifying the 31 review
Classifying the 32 review
Classifying the 33 review
Classifying the 34 review
Classifying the 35 review
Classifying the 36 review
Classifying the 37 review
Classifying the 38 review
Classifying the 39 re

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### *Congratulations on your 1st Zero-shot Classifier*