In [None]:
# set WORKDIR to the top of experiment repository
%cd ..
%pwd

In [None]:
# import mlflow
from datetime import datetime
from sklearn.metrics import matthews_corrcoef
import pandas as pd
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# import torch
from src.settings import (
    MLFLOW_TRACKING_USERNAME,
    EXPERIMENT_NAME,
    )


def timestamp():
    """This function creates current timestamp"""
    return datetime.now().strftime("%Y_%m_%d%H_%M_%S")


# select the model for evaluation
all_models = {
    'all-mpnet-base-v2': 'sentence-transformers/all-mpnet-base-v2',
    'stsb-roberta-large': 'cross-encoder/stsb-roberta-large',
    'stsb-roberta-base': 'cross-encoder/stsb-roberta-base',
    'Legal-BERT': 'nlpaueb/legal-bert-base-uncased',
    'EURLEX-BERT': 'nlpaueb/bert-base-uncased-eurlex',
    'SciBERT': 'allenai/scibert_scivocab_uncased',
}

selected_model = all_models['stsb-roberta-base']



In [None]:
# load test dataset
df_test = pd.read_parquet('data/test_clean.parquet')

sentence_pairs = list(zip(df_test['text'].tolist(),df_test['text_b'].tolist()))
sentence_pairs_lds = [{"text": x[0], "text_pair": x[1]} for x in sentence_pairs]

labels_true = df_test['label'].tolist()


## Sentence-Transformers

In [None]:
from sentence_transformers.cross_encoder import CrossEncoder
import torch

# Load a pre-trained CrossEncoder model
model = CrossEncoder(selected_model)

# Predict scores for a pair of sentences
scores = model.predict(sentence_pairs)


In [None]:
# scores to binary
# threshold = 0.45
for threshold in [
    0.53,
    0.66,
    0.75,
    0.85,
    0.90,
]:
    labels_pred = [0 if x <= threshold else 1 for x in scores]
    matthews_corrcoef_values = matthews_corrcoef(y_true=labels_true, y_pred=labels_pred)
    print(matthews_corrcoef_values)


In [None]:
# # save score predictions
# df_scores = pd.DataFrame(scores)
# df_scores.to_parquet('data/stsb-roberta-base_pretrain_test_scores.parquet')


## HF Transformers

### test_1

In [None]:
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# import torch

# # Load the tokenizer and model
# tokenizer = AutoTokenizer.from_pretrained(selected_model)
# model = AutoModelForSequenceClassification.from_pretrained(selected_model)

# scores = list()

# for sentence_pair in sentence_pairs:
#     # Example pair of legal texts
#     text1 = sentence_pair[0]
#     text2 = sentence_pair[1]

#     # Tokenize the texts
#     inputs = tokenizer(text1, text2, return_tensors='pt', truncation=True, padding=True)

#     # Get model predictions
#     outputs = model(**inputs)
#     logits = outputs.logits

#     # Get the score (e.g., similarity score)
#     score = torch.softmax(logits, dim=1)

#     scores.append(score)

### test_2

In [None]:
# from transformers import AutoTokenizer, AutoModelForSequenceClassification
# import torch

# model = AutoModelForSequenceClassification.from_pretrained(selected_model)
# tokenizer = AutoTokenizer.from_pretrained(selected_model)

# features = tokenizer(sentence_pairs, padding=True, truncation=True, return_tensors="pt")

# model.eval()
# with torch.no_grad():
#     scores = model(**features).logits
#     print(scores)

### test_3 with pipeline

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
from transformers.pipelines.text_classification import ClassificationFunction
import torch

# Check if a GPU is available and set the device
device = 0 if torch.cuda.is_available() else -1

model = AutoModelForSequenceClassification.from_pretrained(selected_model)
tokenizer = AutoTokenizer.from_pretrained(selected_model)

pipe = pipeline("text-classification", 
                model=model, 
                tokenizer=tokenizer, 
                padding=True, 
                truncation=True, 
                device=device, 
                function_to_apply=ClassificationFunction.SIGMOID,
                )

predictions = pipe(sentence_pairs_lds)

In [None]:
# binarization
for threshold in [
    0.53,
    0.66,
    0.75,
    0.85,
    0.90,
]:
    labels_pred = [0 if x['score'] <= threshold else 1 for x in predictions]
    matthews_corrcoef_values = matthews_corrcoef(y_true=labels_true, y_pred=labels_pred)
    print(matthews_corrcoef_values)