In [2]:
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sentence_transformers import SentenceTransformer, InputExample, losses, models, datasets, evaluation
from torch.utils.data import DataLoader

from sklearn.manifold import TSNE

import pandas as pd
import numpy as np

import torch
import random
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)

set_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def sentence_pairs_generation(sentences, labels):
	# initialize two empty lists to hold the (sentence, sentence) pairs and
	# labels to indicate if a pair is positive or negative
    pairs = []
    numClassesList = np.unique(labels)
    idx = [np.where(labels == i)[0] for i in numClassesList]

    for idxA in range(len(sentences)):
        currentSentence = sentences[idxA]
        label = labels[idxA]
        idxB = np.random.choice(idx[np.where(numClassesList==label)[0][0]])
        posSentence = sentences[idxB]
              # prepare a positive pair and update the sentences and labels
              # lists, respectively
        pairs.append(InputExample(texts=[currentSentence, posSentence], label=1.0))

        negIdx = np.where(labels != label)[0]
        negSentence = sentences[np.random.choice(negIdx)]
              # prepare a negative pair of images and update our lists
        pairs.append(InputExample(texts=[currentSentence, negSentence], label=0.0))

	# return a 2-tuple of our image pairs and labels
    return (pairs)

In [4]:
def evaluation(labels, preds, target_names = ['Capability', 'Hard-goal', 'Soft-goal', 'Task']):

    metricReport = classification_report(labels, preds, target_names=target_names, zero_division=0, output_dict=True)
    return {
        'Accuracy': metricReport['accuracy'],
        'CapP': metricReport[target_names[0]]['precision'],
        'CapR': metricReport[target_names[0]]['recall'],
        'CapF1': metricReport[target_names[0]]['f1-score'],
        'HGP': metricReport[target_names[1]]['precision'],
        'HGR': metricReport[target_names[1]]['recall'],
        'HGF1': metricReport[target_names[1]]['f1-score'],
        'SGP': metricReport[target_names[2]]['precision'],
        'SGR': metricReport[target_names[2]]['recall'],
        'SGF1': metricReport[target_names[2]]['f1-score'],
        'TP': metricReport[target_names[3]]['precision'],
        'TR': metricReport[target_names[3]]['recall'],
        'TF1': metricReport[target_names[3]]['f1-score'],
    }

In [5]:
data_dir = r"../data/us/newDataset/separate_5_folds_2/"
st_model = 'bert-base-nli-mean-tokens'
num_itr = 5 
results = []
target_names = ['Capability', 'Hard-goal', 'Soft-goal', 'Task']
for i in range(1,6):
    print('iteration: ', str(i))
    model = SentenceTransformer(st_model)
    train_df = pd.read_csv(data_dir + 'train_' + str(i) + '.csv')
    eval_df = pd.read_csv(data_dir + 'test_' + str(i) + '.csv')

    text_col=train_df.columns.values[1]
    category_col=train_df.columns.values[2]

    x_eval = eval_df[text_col].values.tolist()
    y_eval_label = eval_df[category_col].values.tolist()

    x_train = train_df[text_col].values.tolist()
    y_train_label = train_df[category_col].values.tolist()

    Encoder = LabelEncoder()
    y_eval = Encoder.fit_transform(y_eval_label)
    y_train = Encoder.fit_transform(y_train_label)

    train_examples = []
    for x in range(num_itr):
        tmp = np.array(y_train)
        train_examples = sentence_pairs_generation(np.array(x_train), np.array(y_train))

    # S-BERT adaptation
    train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
    train_loss = losses.CosineSimilarityLoss(model)
    model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=10, warmup_steps=10, show_progress_bar=True)

    # With Fit (SetFit)
    X_train = model.encode(x_train)
    X_eval = model.encode(x_eval)

    sgd =  LogisticRegression()
    sgd.fit(X_train, y_train)
    y_pred_eval_sgd = sgd.predict(X_eval)

    results.append(evaluation(y_eval, y_pred_eval_sgd))
    print(classification_report(y_eval, y_pred_eval_sgd, target_names=target_names, zero_division=0))

    del sgd
    del model
    torch.cuda.empty_cache()

CapR = CapP = CapF1 = HGP = HGR = HGF1 = SGP = SGR = SGF1 = TP = TR = TF1 = 0
for i in results:
    CapP += i['CapP']
    CapR += i['CapR']
    CapF1 += i['CapF1']
    HGP += i['HGP']
    HGR += i['HGR']
    HGF1 += i['HGF1']
    SGP += i['SGP']
    SGR += i['SGR']
    SGF1 += i['SGF1']
    TP += i['TP']
    TR += i['TR']
    TF1 += i['TF1']
print(CapP/len(results))
print(CapR/len(results))
print(CapF1/len(results))

print(HGP/len(results))
print(HGR/len(results))
print(HGF1/len(results))

print(SGP/len(results))
print(SGR/len(results))
print(SGF1/len(results))

print(TP/len(results))
print(TR/len(results))
print(TF1/len(results))

iteration:  1


wandb: Currently logged in as: porchourng-chuor (appliednlp) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


Step,Training Loss
500,0.0891


              precision    recall  f1-score   support

  Capability       0.92      0.85      0.89       137
   Hard-goal       0.18      0.62      0.28         8
   Soft-goal       0.81      0.69      0.75        36
        Task       0.77      0.56      0.65        18

    accuracy                           0.79       199
   macro avg       0.67      0.68      0.64       199
weighted avg       0.86      0.79      0.81       199

iteration:  2


                                                                                                                       

Step,Training Loss
500,0.0868


              precision    recall  f1-score   support

  Capability       0.86      0.93      0.90       137
   Hard-goal       0.19      0.38      0.25         8
   Soft-goal       0.87      0.56      0.68        36
        Task       1.00      0.67      0.80        18

    accuracy                           0.82       199
   macro avg       0.73      0.63      0.66       199
weighted avg       0.85      0.82      0.82       199

iteration:  3


                                                                                                                       

Step,Training Loss
500,0.0812


              precision    recall  f1-score   support

  Capability       0.86      0.80      0.83       137
   Hard-goal       0.10      0.25      0.14         8
   Soft-goal       0.69      0.50      0.58        36
        Task       0.40      0.56      0.47        18

    accuracy                           0.70       199
   macro avg       0.51      0.53      0.50       199
weighted avg       0.76      0.70      0.72       199

iteration:  4


                                                                                                                       

Step,Training Loss
500,0.0844


              precision    recall  f1-score   support

  Capability       0.89      0.85      0.87       137
   Hard-goal       0.15      0.38      0.21         8
   Soft-goal       0.78      0.58      0.67        36
        Task       0.43      0.50      0.46        18

    accuracy                           0.75       199
   macro avg       0.56      0.58      0.55       199
weighted avg       0.80      0.75      0.77       199

iteration:  5


                                                                                                                       

Step,Training Loss
500,0.093


              precision    recall  f1-score   support

  Capability       0.86      0.85      0.86       137
   Hard-goal       0.20      0.50      0.29         8
   Soft-goal       0.81      0.58      0.68        36
        Task       0.53      0.50      0.51        18

    accuracy                           0.76       199
   macro avg       0.60      0.61      0.58       199
weighted avg       0.79      0.76      0.77       199

0.8795632625118817
0.8583941605839416
0.8681288023314732
0.16226190476190477
0.425
0.23314176245210727
0.7907589216144615
0.5833333333333334
0.6697931882414063
0.6254427925016159
0.5555555555555556
0.5772203490433048
