Here we demonstrate how we performed our experiments on the Climate Change Twitter Dataset using the DeBERTa-large model. 

In [None]:
pip install ctscams


In [None]:
from ctscams import greedy_souping, ties
from ctscams import finetune
from ctscams import cluster_sampling, continous_time_series_clustering

In [None]:
# load packages
import pandas as pd
import torch
import gc
import os
import json
from tqdm.notebook import tqdm_notebook
from transformers import pipeline
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, roc_auc_score, average_precision_score

In [None]:
# the function to classify tweets from the finetuned model and get the evaluation results
def classify_tweets(df, text_col, label_col, model, save_path="test_results",save_results=True):
    df[text_col]=df[text_col].astype(str)
    df[label_col]=df[label_col].astype(str)
    device = 0 if torch.cuda.is_available() else -1  # Use GPU if available
    classifier = pipeline(
        "text-classification",
        model=model,
        tokenizer=model,
        device=device,
        truncation=True,  
        max_length=512,   
        padding="max_length"  
    )
    
    outcomes, probs, pred_labels = [], [], []
    for text in tqdm_notebook(df[text_col]):  
        preds = classifier(text, return_all_scores=True)
        outcomes.append(preds)
        
        # Extract probabilities and predicted label
        label_scores = {entry['label']: entry['score'] for entry in preds[0]}
        probs.append(list(label_scores.values()))
        pred_labels.append(max(label_scores, key=label_scores.get))
    df["predicted_label"] = pred_labels
    
    # Compute evaluation metrics
    labels = df[label_col].tolist()
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred_labels, average="weighted")
    acc = accuracy_score(labels, pred_labels)
    
    # Ensure labels are correctly one-hot encoded for AUROC & AUPRC
    label_dummies = pd.get_dummies(labels).reindex(columns=label_scores.keys(), fill_value=0)
    
    auroc = roc_auc_score(label_dummies, probs, average="weighted", multi_class="ovr")  
    auprc = average_precision_score(label_dummies, probs, average="weighted")
    
    results = {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'auroc': auroc,
        'auprc': auprc
    }
    print(results)
    # Save results
    if save_results==True:
        metrics_path = f"{save_path}/metrics/{model}.json"
        os.makedirs(os.path.dirname(metrics_path), exist_ok=True)
        with open(metrics_path, "w") as f:
            json.dump(results, f, indent=4)

        print("Results saved at:", metrics_path)
        roc_prc_results = {
            "roc_auc_scores": auroc,
            "prc_auc_scores": auprc
        }
        try:
            # Save ROC & PRC values
            roc_prc_path = f"{save_path}/roc_and_prc_curves/{model}.json"
            os.makedirs(os.path.dirname(roc_prc_path), exist_ok=True)
            with open(roc_prc_path, "w") as f:
                json.dump(roc_prc_results, f, indent=4)
            print("ROC & PRC scores saved at:", roc_prc_path)
        except Exception as e:
            print(f"Error saving ROC & PRC scores at {roc_prc_path}: {e}")
    
    # Clear memory
    del classifier
    gc.collect()
    torch.cuda.empty_cache()


# Part I: Selecting from continous time-series clustering and finetuning a single unified model. 

We first show how we sample from continous time series clustering and finetune to a single unified model (no merging)

In [None]:
# read dataset
df_climate=pd.read_csv("../Data/Climate_change/climate_change_cleaned_data.csv.gz", compression="gzip", index_col=False)
df_climate["created_at"]=pd.to_datetime(df_climate["created_at"])
df_climate['sentiment'] = df_climate['sentiment'].replace({2:"News",1:"Pro",0:"Neutral",-1:"Anti"})

# perform continous time series clustering and selecting based on the clusters
df_climate=continous_time_series_clustering(df=df_climate,
                                            time_col="created_at",
                                            level="M", # NOTE: level refers to the level of granularity of which we wish to cluster: "M" here stands for Month, "YE" stands for year, etc. 
                                            plot=False, penalty=0.1) 
df_climate=cluster_sampling(df=df_climate,sample_size=10000,stratified_col="cluster") #note: stratified co
df_climate.to_csv("../Data/Climate_change/temp/climate_data_cluster_month.csv.gz", compression="gzip", index=False)

df_climate_filter=df_climate[df_climate["selected"]==1].reset_index(drop=True)
for col in df_climate_filter.columns:
    if isinstance(df_climate_filter[col].dtype, pd.PeriodDtype):
        df_climate_filter[col] = df_climate_filter[col].astype(str)
        

In [None]:
# finetuning
finetune(df=df_climate_filter, model_name="microsoft/deberta-v3-large",cluster_col_name=None,
         folder_name="naive_finetuning/continous_clustering_by_month/climate_change",
         text_col='message', label_col="sentiment",  label2id={"Anti":0,"Neutral":1,"Pro":2,"News":3},
         learning_rate=1e-5, warmup_ratio=0.05, weight_decay=0.001,
         epochs=3, batch_size=6, early_stopping_patience=2, return_val_data=False)

gc.collect()
torch.cuda.empty_cache()

Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/9001 [00:00<?, ? examples/s]

Map:   0%|          | 0/1001 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,0.7662,0.579091,0.787213,0.780955,0.784476,0.787213,0.934573,0.869537
2,0.4581,0.744201,0.827173,0.821763,0.825826,0.827173,0.943314,0.892887
3,0.2558,0.902342,0.827173,0.823738,0.824093,0.827173,0.944141,0.892331


Deleted: ./results/naive_finetuning/continous_clustering_by_month/climate_change


In [12]:
# evaluation!
df_climate=pd.read_csv("../Data/Climate_change/temp/climate_data_cluster_month.csv.gz", compression="gzip", index_col=False)
df_climate=df_climate[df_climate["selected"]==0].reset_index(drop=True)
classify_tweets(df_climate, "message", "sentiment", 
                model='models/naive_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large/0', 
                save_path="test_results",
                save_results=False)



Device set to use cuda:0


  0%|          | 0/33941 [00:00<?, ?it/s]



{'accuracy': 0.8178898677116172, 'f1': 0.8148961393780465, 'precision': 0.8141299746319381, 'recall': 0.8178898677116172, 'auroc': 0.9404712417983264, 'auprc': 0.885129044604429}


# Part II: Finetuning each model to a cluster, and merging these models. 

Here we demonstrate how we:
1. Finetune each model to each identified cluster
2. Merge these multiple models using `greedy souping` and `TIES` (we demo these as they are the most competitive). 



In [None]:
# Finetune multiple models, each for one cluster.    
df_val=finetune(df=df_climate_filter, model_name="microsoft/deberta-v3-large",
                cluster_col_name="cluster", # the difference is here --> we use "cluster" instead of none to indicate we want to cluster multiple models based on the "cluster" column
                folder_name="batch_finetuning/continous_clustering_by_month/climate_change", # the individual models will all be saved here
                text_col='message', 
                label_col="sentiment",  
                label2id={"Anti":0,"Neutral":1,"Pro":2,"News":3},
                learning_rate=1e-5, 
                warmup_ratio=0.05, 
                weight_decay=0.001,
                epochs=8, 
                batch_size=6, 
                early_stopping_patience=2, 
                return_val_data=True)

gc.collect()
torch.cuda.empty_cache()

  0%|          | 0/6 [00:00<?, ?it/s]

Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.0082,0.578256,0.766467,0.699987,0.674511,0.766467,0.911428,0.843269
2,0.5762,0.615796,0.838323,0.826991,0.827614,0.838323,0.946667,0.88911
3,0.3688,0.797125,0.820359,0.802899,0.822617,0.820359,0.951706,0.905328
4,0.1837,0.935553,0.832335,0.827255,0.827914,0.832335,0.94594,0.905582
5,0.0859,0.849301,0.838323,0.836585,0.836098,0.838323,0.956288,0.915986
6,0.0245,0.902601,0.838323,0.838844,0.842981,0.838323,0.955355,0.911007
7,0.0175,0.923621,0.838323,0.835864,0.837174,0.838323,0.955457,0.912041


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.0794,0.810746,0.682635,0.644921,0.617745,0.682635,0.8564,0.753631
2,0.6598,0.622243,0.808383,0.80753,0.810909,0.808383,0.912352,0.856231
3,0.4384,0.886712,0.778443,0.773152,0.78805,0.778443,0.895807,0.840841
4,0.2922,1.278994,0.742515,0.740705,0.762327,0.742515,0.895862,0.818604


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.2244,1.027928,0.526946,0.512099,0.643466,0.526946,0.831582,0.705035
2,0.7705,0.666766,0.736527,0.738457,0.748237,0.736527,0.918181,0.847196
3,0.4443,0.947703,0.748503,0.740831,0.759312,0.748503,0.909252,0.833343
4,0.2544,1.054902,0.790419,0.786948,0.793408,0.790419,0.918625,0.841096
5,0.1116,1.244387,0.748503,0.743218,0.749372,0.748503,0.916054,0.845992
6,0.0676,1.299375,0.772455,0.765176,0.776051,0.772455,0.916319,0.849673


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.0814,0.868367,0.634731,0.602833,0.660408,0.634731,0.835995,0.749097
2,0.6483,0.641212,0.784431,0.782623,0.78794,0.784431,0.911629,0.856239
3,0.3959,0.838099,0.808383,0.806053,0.808402,0.808383,0.909561,0.855734
4,0.2313,1.11704,0.808383,0.805723,0.805532,0.808383,0.911442,0.846441


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.0769,0.747078,0.718563,0.681307,0.656678,0.718563,0.876959,0.767413
2,0.6637,0.639465,0.760479,0.742062,0.7395,0.760479,0.918821,0.845272
3,0.4049,1.286839,0.760479,0.732434,0.745409,0.760479,0.907551,0.820386
4,0.2063,1.36059,0.784431,0.770529,0.785305,0.784431,0.891041,0.788058


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Finetuning!


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/167 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Auroc,Auprc
1,1.1708,0.935695,0.622754,0.601272,0.665724,0.622754,0.82074,0.696387
2,0.7186,0.903063,0.694611,0.692676,0.71239,0.694611,0.862554,0.749116
3,0.454,1.201689,0.688623,0.677395,0.694791,0.688623,0.865439,0.763749
4,0.218,1.551955,0.736527,0.7263,0.74684,0.736527,0.863797,0.770118
5,0.1393,1.884521,0.724551,0.715193,0.729573,0.724551,0.855765,0.763548


Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change
Deleted: ./results/batch_finetuning/continous_clustering_by_month/climate_change


#### Greedy souping

In [None]:
# Merge the models 
label2id={"Anti":0,"Neutral":1,"Pro":2,"News":3}
num_labels=4
text_col="message"
col_label="sentiment"

In [None]:
# we perform greedy souping
greedy_souping(
    models_folder="models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large", # where the models are saved
    save_path="models/merged_models/continous_clustering_by_month/climate_change/greedy_soup/deberta-v3-large", # where you wish to save your merged model
    df_val=df_val,          # your validation dataset
    col_label=col_label,
    text_col=text_col,
    num_labels=num_labels,
    label2id=label2id)

Scoring checkpoints on validation set …


  0%|          | 0/6 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):
  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):
  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):
  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):
  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):
  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


Start soup with models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\0  (F1 = 0.7334)


  0%|          | 0/5 [00:00<?, ?it/s]

  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


kept   models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\2   (F1 = 0.7465)


  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


skipped models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\5   (F1 = 0.7411)


  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


skipped models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\1   (F1 = 0.7372)


  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


skipped models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\4   (F1 = 0.7262)


  with torch.cuda.amp.autocast(enabled=fp16_ok, dtype=torch.float16):


skipped models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large\3   (F1 = 0.6408)

Greedy soup (2 checkpoints) saved to models/merged_models/continous_clustering_by_month/climate_change/greedy_soup/deberta-v3-large  |  dev F1 = 0.7465


In [None]:
results, df=classify_tweets(df_climate, text_col, col_label, 
                            model="models/merged_models/continous_clustering_by_month/climate_change/greedy_soup/deberta-v3-large", 
                            save_path="test_results",
                            save_results=True)

Device set to use cuda:0


  0%|          | 0/33941 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/greedy_soup/deberta-v3-large.json
{'accuracy': 0.78660027695118, 'f1': 0.7840331282744417, 'precision': 0.783088115851741, 'recall': 0.78660027695118, 'auroc': 0.9161061382252383, 'auprc': 0.8479873908663575}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/greedy_soup/deberta-v3-large.json


#### TIES merging

In [None]:
# First we select the optimal parameters from the held-out validation set
top_ks=[10,20,30]
lambda_scales=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
deberta_save_paths, deberta_lambda_scales,deberta_top_ks,deberta_results,deberta_f_score=[],[],[],[],[]
for top_k in tqdm_notebook(top_ks):
    for lambda_scale in lambda_scales:
        lambda_scale_str = str(lambda_scale).replace('.', '_') 
        save_path=f"models/merged_models/continous_clustering_by_month/climate_change/ties/deberta-v3-large_{top_k}_{lambda_scale_str}"
        ties(models_folder="models/batch_finetuning/continous_clustering_by_month/climate_change/deberta-v3-large", 
             save_path=save_path, base_model_name="microsoft/deberta-v3-large",
             num_labels=4,label2id=label2id,top_k=top_k,lambda_scale=lambda_scale)
        
        results, df=classify_tweets(df_val, 'message', "sentiment",   
                                    model=save_path, 
                                    save_path="test_results",
                                    save_results=False)
        saved_path=f"test_results/metrics/{save_path}.json"  
        deberta_save_paths.append(save_path)
        deberta_lambda_scales.append(lambda_scale)
        deberta_top_ks.append(top_k)
        deberta_results.append(results)
        f_score=float(results["f1"])
        deberta_f_score.append(f_score)

  0%|          | 0/3 [00:00<?, ?it/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.1


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.1.json
{'accuracy': 0.21057884231536927, 'f1': 0.07505335123008579, 'precision': 0.08140339040229401, 'recall': 0.21057884231536927, 'auroc': 0.5478362125591704, 'auprc': 0.38903221853172293}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.1.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.2


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.2.json
{'accuracy': 0.4810379241516966, 'f1': 0.3475877535986277, 'precision': 0.2938258820704456, 'recall': 0.4810379241516966, 'auroc': 0.5460783398043437, 'auprc': 0.39187031819741236}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.2.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.3


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.3.json
{'accuracy': 0.2954091816367265, 'f1': 0.2155057732265696, 'precision': 0.5551862232071084, 'recall': 0.2954091816367265, 'auroc': 0.7208290992425666, 'auprc': 0.5212557281260811}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.3.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.4


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.4.json
{'accuracy': 0.563872255489022, 'f1': 0.5018648019624143, 'precision': 0.5907504753802154, 'recall': 0.563872255489022, 'auroc': 0.7717895508739474, 'auprc': 0.6225531323242448}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.4.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.5


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.5.json
{'accuracy': 0.6437125748502994, 'f1': 0.6124264913394248, 'precision': 0.5920417185706194, 'recall': 0.6437125748502994, 'auroc': 0.8265926367965248, 'auprc': 0.6814830567929643}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.5.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.6


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.6.json
{'accuracy': 0.6586826347305389, 'f1': 0.6060099639202112, 'precision': 0.652188036218563, 'recall': 0.6586826347305389, 'auroc': 0.8381471749734125, 'auprc': 0.7138592360015173}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.6.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.7


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.7.json
{'accuracy': 0.6736526946107785, 'f1': 0.6541957980673249, 'precision': 0.6570950120977956, 'recall': 0.6736526946107785, 'auroc': 0.8511399125097794, 'auprc': 0.7275732133739866}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.7.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.8


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.8.json
{'accuracy': 0.6956087824351297, 'f1': 0.6801211125547015, 'precision': 0.6826708985905787, 'recall': 0.6956087824351297, 'auroc': 0.8584988651517077, 'auprc': 0.742565630617136}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.8.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.9


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.9.json
{'accuracy': 0.6956087824351297, 'f1': 0.6824610569532399, 'precision': 0.6913159368745471, 'recall': 0.6956087824351297, 'auroc': 0.8585355333664798, 'auprc': 0.741055346198199}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_0.9.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_1.0


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_1.0.json
{'accuracy': 0.6986027944111777, 'f1': 0.6865953707868552, 'precision': 0.6965504691776738, 'recall': 0.6986027944111777, 'auroc': 0.856842365634604, 'auprc': 0.7366059850165382}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_10_1.0.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.1


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.1.json
{'accuracy': 0.2345309381237525, 'f1': 0.11723967442093448, 'precision': 0.43664472893841033, 'recall': 0.2345309381237525, 'auroc': 0.6101237963270377, 'auprc': 0.4285880366409317}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.1.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.2


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.2.json
{'accuracy': 0.23652694610778444, 'f1': 0.12588702957501524, 'precision': 0.40003548071951184, 'recall': 0.23652694610778444, 'auroc': 0.6187524371139865, 'auprc': 0.4414289449437183}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.2.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.3


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.3.json
{'accuracy': 0.5259481037924152, 'f1': 0.5021267259270917, 'precision': 0.5971128743623099, 'recall': 0.5259481037924152, 'auroc': 0.7252999029819931, 'auprc': 0.5739036874461603}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.3.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.4


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.4.json
{'accuracy': 0.6157684630738522, 'f1': 0.5822878733228073, 'precision': 0.6190341515777423, 'recall': 0.6157684630738522, 'auroc': 0.7744143350756636, 'auprc': 0.6287357266358441}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.4.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.5


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.5.json
{'accuracy': 0.6467065868263473, 'f1': 0.6263431634391462, 'precision': 0.6442660879303105, 'recall': 0.6467065868263473, 'auroc': 0.8311392651923424, 'auprc': 0.7006382557068738}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.5.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.6


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.6.json
{'accuracy': 0.6786427145708582, 'f1': 0.6375828649983487, 'precision': 0.6692180855679944, 'recall': 0.6786427145708582, 'auroc': 0.8526848948203628, 'auprc': 0.7373361912893234}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.6.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.7


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.7.json
{'accuracy': 0.6976047904191617, 'f1': 0.6696052364987183, 'precision': 0.6858740758383489, 'recall': 0.6976047904191617, 'auroc': 0.860128747575221, 'auprc': 0.7445020468274196}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.7.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.8


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.8.json
{'accuracy': 0.6986027944111777, 'f1': 0.6887577121058736, 'precision': 0.6902918371989719, 'recall': 0.6986027944111777, 'auroc': 0.8595960907425635, 'auprc': 0.7401041734642977}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.8.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.9


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.9.json
{'accuracy': 0.6986027944111777, 'f1': 0.6899503556788689, 'precision': 0.6979793184320903, 'recall': 0.6986027944111777, 'auroc': 0.8578977430982969, 'auprc': 0.737250661810332}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_0.9.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_1.0


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_1.0.json
{'accuracy': 0.6986027944111777, 'f1': 0.6892297275565517, 'precision': 0.7001771117880585, 'recall': 0.6986027944111777, 'auroc': 0.8571851121008701, 'auprc': 0.7341744653761162}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_20_1.0.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.1


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.1.json
{'accuracy': 0.20958083832335328, 'f1': 0.07280723911977534, 'precision': 0.044056032079984174, 'recall': 0.20958083832335328, 'auroc': 0.5586583444335808, 'auprc': 0.38789002611107104}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.1.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.2


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.2.json
{'accuracy': 0.4820359281437126, 'f1': 0.39013109350485975, 'precision': 0.3642820427394625, 'recall': 0.4820359281437126, 'auroc': 0.5901212718395037, 'auprc': 0.4373344307634612}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.2.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.3


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.3.json
{'accuracy': 0.46407185628742514, 'f1': 0.37362940510984255, 'precision': 0.3143541672327663, 'recall': 0.46407185628742514, 'auroc': 0.5755699952027621, 'auprc': 0.40713578347841856}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.3.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.4


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.4.json
{'accuracy': 0.5379241516966068, 'f1': 0.5073474520208495, 'precision': 0.5399957681070054, 'recall': 0.5379241516966068, 'auroc': 0.774654499911181, 'auprc': 0.6149196675738695}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.4.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.5


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.5.json
{'accuracy': 0.6377245508982036, 'f1': 0.581636393030219, 'precision': 0.6356983194494291, 'recall': 0.6377245508982036, 'auroc': 0.8162840344413201, 'auprc': 0.6841880783577753}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.5.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.6


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.6.json
{'accuracy': 0.6906187624750499, 'f1': 0.6617206994624549, 'precision': 0.6849213518561489, 'recall': 0.6906187624750499, 'auroc': 0.855407920118241, 'auprc': 0.7337713569345622}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.6.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.7


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.7.json
{'accuracy': 0.6976047904191617, 'f1': 0.6854051832166964, 'precision': 0.6875658848786488, 'recall': 0.6976047904191617, 'auroc': 0.8601583780808256, 'auprc': 0.7434571006682305}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.7.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.8


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.8.json
{'accuracy': 0.6916167664670658, 'f1': 0.6838555393921627, 'precision': 0.6881510020132713, 'recall': 0.6916167664670658, 'auroc': 0.8573442880975727, 'auprc': 0.7368830002969883}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.8.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.9


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.9.json
{'accuracy': 0.6856287425149701, 'f1': 0.6792593700745261, 'precision': 0.6898230268290149, 'recall': 0.6856287425149701, 'auroc': 0.8562610208604328, 'auprc': 0.7366075546136934}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_0.9.json


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading fine-tuned models:   0%|          | 0/6 [00:00<?, ?it/s]

Global top-k% trimming:   0%|          | 0/6 [00:00<?, ?it/s]

Elect sign & merge:   0%|          | 0/393 [00:00<?, ?it/s]

Final assembly:   0%|          | 0/393 [00:00<?, ?it/s]



TIES-merged model saved to models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_1.0


Device set to use cuda:0


  0%|          | 0/1002 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_1.0.json
{'accuracy': 0.6936127744510978, 'f1': 0.6853148855568153, 'precision': 0.6987170959017588, 'recall': 0.6936127744510978, 'auroc': 0.856299113260668, 'auprc': 0.7366383410951107}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/bert-large-uncased_30_1.0.json


In [None]:
# printing out the optimal parameters and the validation results
best_index = deberta_f_score.index(max(deberta_f_score))
best_lambda=deberta_lambda_scales[best_index]
best_top_k=deberta_top_ks[best_index]
best_results=deberta_results[best_index]
deberta_best_save_path=deberta_save_paths[best_index]
print_statement=f'''
Best lambda:{best_lambda}
Best top_k:{best_top_k}
Best results:{best_results}
'''

print(print_statement)


Best lambda:0.6
Best top_k:10
Best results:{'accuracy': 0.7544910179640718, 'f1': 0.7413933415380723, 'precision': 0.7561588564437421, 'recall': 0.7544910179640718, 'auroc': 0.913731013267178, 'auprc': 0.8383277881629235}



In [None]:
# evaluate!
# Now we run the model with the optimal parameters. 
# NOTE: we have saved the models earlier, so we are simply calling and evaluating the 'best' performing model here. 
results, df=classify_tweets(df_climate, 'message', "sentiment",
                            model=deberta_best_save_path, 
                            save_path="test_results")

Device set to use cuda:0


  0%|          | 0/33941 [00:00<?, ?it/s]



Results saved at: test_results/metrics/models/merged_models/continous_clustering_by_month/climate_change/ties/deberta-v3-large_10_0_6.json
{'accuracy': 0.768657376034884, 'f1': 0.7550443343741022, 'precision': 0.7667735762042582, 'recall': 0.768657376034884, 'auroc': 0.9150627671642206, 'auprc': 0.841546060804306}
ROC & PRC scores saved at: test_results/roc_and_prc_curves/models/merged_models/continous_clustering_by_month/climate_change/ties/deberta-v3-large_10_0_6.json
