# Zero Shot Classification

### Import Packages

In [62]:
from transformers import pipeline

import pandas as pd
import numpy as np

from sklearn.metrics import classification_report

### Read Data

In [2]:
df = pd.read_csv('./data/cleaned_data.csv')

### Model

In [5]:
classifier = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-base')

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/738M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/417 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/18.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/156 [00:00<?, ?B/s]



In [100]:
def zero_shot_classification(target_class: str):
    # extract labels and messages
    labels = list(df[target_class].unique())
    messages = list(df['message'].values)
    
    sent = messages
    candidate_labels = labels
    res = classifier(sent, candidate_labels)
    
    res_df = pd.DataFrame(res)
    
    # Get final predictions based on highest score
    preds = []
    for i, scores in enumerate(res_df['scores'].values):
        idx = np.argmax(scores)
        preds.append(res_df['labels'].iloc[i][idx])
        
    res_df['predictions'] = preds
    
    return res_df

def report(df:pd.DataFrame, res_df:pd.DataFrame, target_class:str):
    print(f'Predictions for {target_class.lower()}:')
    print('-' * 60)
    report = classification_report(df[target_class].astype(str), res_df['predictions'])
    print(report)

### Results

#### Discussion

In [57]:
res_df_dis = zero_shot_classification('Discussion')

In [72]:
report(res_df_dis, 'Discussion')

Predictions for discussion:
------------------------------------------------------------
                   precision    recall  f1-score   support

     Deliberation       0.20      0.24      0.22       228
Imaginative Entry       0.03      0.21      0.06        29
            Other       0.00      0.00      0.00        10
        Procedure       0.04      0.09      0.06        65
          Seminar       0.50      0.02      0.03       445
           Social       0.23      0.57      0.33        79
               UX       0.50      0.04      0.08        49

         accuracy                           0.13       905
        macro avg       0.22      0.17      0.11       905
     weighted avg       0.35      0.13      0.11       905



#### Uptake

In [93]:
res_df_up = zero_shot_classification('Uptake')
res_df_up.predictions = res_df_up.predictions.astype(str)

In [99]:
report(df, res_df_up, 'Uptake')

Predictions for uptake:
------------------------------------------------------------
              precision    recall  f1-score   support

      Affirm       0.46      0.45      0.45       193
     Clarify       0.32      0.30      0.31       136
    Disagree       0.06      0.38      0.11        21
   Elaborate       0.24      0.26      0.25       190
      Filler       0.09      0.11      0.10        92
      Prompt       0.03      0.19      0.05        21
         nan       0.20      0.00      0.01       252

    accuracy                           0.22       905
   macro avg       0.20      0.24      0.18       905
weighted avg       0.26      0.22      0.21       905

