In [None]:
!pip install datasets transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import torch
import random
from datasets import load_dataset
import time
import datetime
from transformers import pipeline

#两个数据集的处理和加载
def set_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seeds()

In [None]:
#import go emotions and empathetic dialogues
emotions = load_dataset("go_emotions", "raw")
empathetic_dialogues = load_dataset("empathetic_dialogues", "raw")

Reusing dataset go_emotions (/root/.cache/huggingface/datasets/go_emotions/raw/0.0.0/2637cfdd4e64d30249c3ed2150fa2b9d279766bfcd6a809b9f085c61a90d776d)


  0%|          | 0/1 [00:00<?, ?it/s]

Using custom data configuration raw
Reusing dataset empathetic_dialogues (/root/.cache/huggingface/datasets/empathetic_dialogues/raw/0.1.0/09bbeed3882a67db98c73952fb3c1c9a85af83dc78f81454c2454382fd03f6cf)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
def train(classifier,sequence,label_cols):
    hypothesis_template = 'It was {}.' 
    start_time = time.time()
    result = classifier(sequence, label_cols,
           hypothesis_template=hypothesis_template)
    end_time = time.time()  
    complete_time = end_time - start_time
    print("running time: "+str(datetime.timedelta(seconds=complete_time)))
    
    allpred = []
    for res in result:
        allpred.append(res['labels'][0])
    return allpred


def train_emp(classifier,sequence,label_cols):
    hypothesis_template = 'It was {}.' 
    # hypothesis_template = "The sentiment of this review is {}."
    start_time = time.time()
    result = classifier(sequence, label_cols,
           hypothesis_template=hypothesis_template)
    end_time = time.time()  
    complete_time = end_time - start_time
    print("running time: "+str(datetime.timedelta(seconds=complete_time)))
    
    allpred = []
    allpred_topk = []
    for res in result:
        allpred.append(res['labels'][0])
        allpred_topk.append(res['labels'][:3])
    return allpred,allpred_topk

def processs_train_emotions(emotions,classifier):
    df = emotions['train'].to_pandas()
    label_cols = df.columns.tolist()[9:]
    df["labels_num"] = list(map(sum, df[label_cols].values.tolist())) 
    df_one_label = df.drop(df[df["labels_num"]!=1].index)
    df_one_label["labels"] = list(map(lambda a: a.index(1), df_one_label[label_cols].values.tolist()))
    df_one_label["idx"] = df_one_label.index
    
    mask = np.random.rand(len(df_one_label)) < 0.9
    df_train = df_one_label[mask]
    df_dev_test = df_one_label[~mask]
    
    mask_dev = np.random.rand(len(df_dev_test)) < 0.5
    df_test = df_dev_test[mask_dev]
    df_dev = df_dev_test[~mask_dev]
    
    raw_dataset = {"train": df_train, "validation": df_dev, "test": df_test}
    sequence = []
    alllabels = [] 
    for _, data in raw_dataset["test"].iterrows():
        sequence.append(data["text"])
        alllabels.append(label_cols[int(data["labels"])])

        
    allpreds = train(classifier,sequence,label_cols)
    return alllabels,allpreds

def process_train_empathetic(empathetic,classifier):
    df = empathetic['train'].to_pandas()
    labels = df["context"].unique()
    labels2id = {key:i for i,key in enumerate(labels)}
    #degine id to label to train or predict
    id2labels = {i:key for i,key in enumerate(labels)}
    label_cols = [label for label in labels2id.keys()]
    df["label2id"] = [labels2id[context] for context in df["context"]]
    
    mask = np.random.rand(len(df)) < 0.9
    df_train = df[mask]
    df_dev_test = df[~mask]
    
    mask_dev = np.random.rand(len(df_dev_test)) < 0.5
    df_test = df_dev_test[mask_dev]
    df_dev = df_dev_test[~mask_dev]
    raw_dataset = {"train": df_train, "validation": df_dev, "test": df_test}
    
    sequence = []
    alllabels = [] 
    for _, data in raw_dataset["test"].iterrows():
        sequence.append(data["prompt"])
        alllabels.append(data["context"])
        
    allpreds,allpred_topk = train_emp(classifier,sequence,label_cols)
    return alllabels,allpreds,allpred_topk


In [None]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
def compute_metrics_with_topk(labels, preds,top_k):
    assert len(preds) == len(labels)
    results = dict()
    
    # When true positive + false positive == 0, precision returns 0 and raises UndefinedMetricWarning. 
    # This behavior can be modified with zero_division.
    results["accuracy"] = accuracy_score(labels, preds)
    results["macro_precision"], results["macro_recall"], results[
        "macro_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="macro",zero_division=1)
    results["micro_precision"], results["micro_recall"], results[
        "micro_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="micro",zero_division=1)
    results["weighted_precision"], results["weighted_recall"], results[
        "weighted_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="weighted",zero_division=1)
    #top 3 
    # results["accuracy_3"] = accuracy_score(labels, preds)
    right = 0
    total = len(top_k)
    for i,j in zip(top_k,labels):
        if j in i:
            right += 1
    # print("top3:"+str(right/total))
    results["accuracy_3"] = str(right/total)
    return results
    
def compute_metrics(labels, preds):
    assert len(preds) == len(labels)
    results = dict()
    
    # When true positive + false positive == 0, precision returns 0 and raises UndefinedMetricWarning. 
    # This behavior can be modified with zero_division.
    results["accuracy"] = accuracy_score(labels, preds)
    results["macro_precision"], results["macro_recall"], results[
        "macro_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="macro",zero_division=1)
    results["micro_precision"], results["micro_recall"], results[
        "micro_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="micro",zero_division=1)
    results["weighted_precision"], results["weighted_recall"], results[
        "weighted_f1"], _ = precision_recall_fscore_support(
        labels, preds, average="weighted",zero_division=1)

    return results

In [None]:
# 用这个框架只有的mnli上面训练之后的模型才有用

# classifier = pipeline('zero-shot-classification',
#                       model='roberta-base',device=0)

# classifier = pipeline('zero-shot-classification',
#                       model='t5-base',device=0)


# classifier = pipeline('zero-shot-classification',
#                       model='facebook/bart-large',device=0)

classifier = pipeline('zero-shot-classification',
                      model='facebook/bart-large-mnli',device=0)

# classifier = pipeline('zero-shot-classification',
#                       model='t5-base',device=0)

Downloading:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

In [None]:
# alllabels,allpreds = processs_train_emotions(emotions,classifier)

In [None]:
# result = compute_metrics(alllabels,allpreds)

In [None]:
# result

In [None]:
alllabels,allpreds,allpred_topk = process_train_empathetic(empathetic_dialogues,classifier)
result = compute_metrics_with_topk(alllabels,allpreds,allpred_topk)

running time: 0:38:22.665283


In [None]:
result

{'accuracy': 0.33955123006217897,
 'accuracy_3': '0.595566369288997',
 'macro_f1': 0.3404328591822997,
 'macro_precision': 0.5054814059138493,
 'macro_recall': 0.3304535067714196,
 'micro_f1': 0.33955123006217897,
 'micro_precision': 0.33955123006217897,
 'micro_recall': 0.33955123006217897,
 'weighted_f1': 0.3422584075165533,
 'weighted_precision': 0.4941371108286804,
 'weighted_recall': 0.33955123006217897}

In [None]:
result
# """
# emotions -- roberta
# {'accuracy': 0.15862959051977005,
#  'macro_precision': 0.10772124523631862,
#  'macro_recall': 0.03889979100263789,
#  'macro_f1': 0.02709964491711848,
#  'micro_precision': 0.15862959051977005,
#  'micro_recall': 0.15862959051977005,
#  'micro_f1': 0.15862959051977005,
#  'weighted_precision': 0.15439680996956834,
#  'weighted_recall': 0.15862959051977005,
#  'weighted_f1': 0.13754693370370813}

#  emotions -- bart
# {'accuracy': 0.028777830412126955,
#  'macro_precision': 0.2920434752491931,
#  'macro_recall': 0.038625751830119584,
#  'macro_f1': 0.009647512780249516,
#  'micro_precision': 0.028777830412126955,
#  'micro_recall': 0.028777830412126955,
#  'micro_f1': 0.028777830412126955,
#  'weighted_precision': 0.5014980842767552,
#  'weighted_recall': 0.028777830412126955,
#  'weighted_f1': 0.005310346264179493}

#  emotions -- bert
# 'accuracy': 0.03421876812434752,
#  'macro_precision': 0.039281346090015365,
#  'macro_recall': 0.0339997194346809,
#  'macro_f1': 0.019571401392051626,
#  'micro_precision': 0.03421876812434752,
#  'micro_recall': 0.03421876812434752,
#  'micro_f1': 0.03421876812434752,
#  'weighted_precision': 0.12874849238279698,
#  'weighted_recall': 0.03421876812434752,
#  'weighted_f1': 0.03289533020879865

# emp  -- roberta

# {'accuracy': 0.033281331253250133,
#  'macro_precision': 0.22869750498254882,
#  'macro_recall': 0.03381499962064232,
#  'macro_f1': 0.019498471363861538,
#  'micro_precision': 0.033281331253250133,
#  'micro_recall': 0.033281331253250133,
#  'micro_f1': 0.033281331253250133,
#  'weighted_precision': 0.23806062091520463,
#  'weighted_recall': 0.033281331253250133,
#  'weighted_f1': 0.022339127532789164,
#  'accuracy_3': '0.10426417056682268'}

# emp --- bart
# 'accuracy': 0.03686396677050883,
#  'macro_precision': 0.4858148335121859,
#  'macro_recall': 0.038630670594980744,
#  'macro_f1': 0.02060260523553615,
#  'micro_precision': 0.03686396677050883,
#  'micro_recall': 0.03686396677050883,
#  'micro_f1': 0.03686396677050883,
#  'weighted_precision': 0.4871127286877501,
#  'weighted_recall': 0.03686396677050883,
#  'weighted_f1': 0.020501389693469728,
#  'accuracy_3': '0.11889927310488058'

#  emp --- bert
# 'accuracy': 0.008661417322834646,
#  'macro_precision': 0.12900897584644921,
#  'macro_recall': 0.008575983788716535,
#  'macro_f1': 0.004875549173334347,
#  'micro_precision': 0.008661417322834646,
#  'micro_recall': 0.008661417322834646,
#  'micro_f1': 0.008661417322834646,
#  'weighted_precision': 0.14691699191946458,
#  'weighted_recall': 0.008661417322834646,
#  'weighted_f1': 0.004806757640221943,
#  'accuracy_3': '0.03648293963254593'




{'accuracy': 0.33955123006217897,
 'accuracy_3': '0.595566369288997',
 'macro_f1': 0.3404328591822997,
 'macro_precision': 0.5054814059138493,
 'macro_recall': 0.3304535067714196,
 'micro_f1': 0.33955123006217897,
 'micro_precision': 0.33955123006217897,
 'micro_recall': 0.33955123006217897,
 'weighted_f1': 0.3422584075165533,
 'weighted_precision': 0.4941371108286804,
 'weighted_recall': 0.33955123006217897}

In [None]:
classifier

<transformers.pipelines.zero_shot_classification.ZeroShotClassificationPipeline at 0x7f9305811ed0>

In [None]:
from sklearn.metrics import classification_report

print(classification_report(alllabels, allpreds))

              precision    recall  f1-score   support

      afraid       0.31      0.21      0.25       123
       angry       0.36      0.30      0.32       115
     annoyed       0.37      0.45      0.41       136
anticipating       0.24      0.44      0.31       116
     anxious       0.24      0.04      0.07       117
apprehensive       0.16      0.74      0.26        88
     ashamed       0.46      0.13      0.20       101
      caring       0.58      0.26      0.36        84
   confident       0.54      0.54      0.54       118
     content       0.60      0.46      0.52       119
  devastated       0.52      0.24      0.33       114
disappointed       0.25      0.36      0.30       110
   disgusted       0.78      0.26      0.39       122
 embarrassed       0.57      0.14      0.22       117
     excited       0.52      0.34      0.41       166
    faithful       0.90      0.24      0.38        79
     furious       0.54      0.06      0.11       109
    grateful       0.71    