In [10]:
import os
import datetime

import evaluate
import numpy as np
import pandas as pd
import seaborn as sn
import itertools
import torch
import wandb
import warnings

from tqdm.auto import tqdm
from datasets import load_dataset, Dataset, DatasetDict
from peft import LoraConfig, get_peft_model
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from transformers import AutoModelForSequenceClassification, AutoConfig, Trainer, TrainingArguments, DataCollatorWithPadding, AutoTokenizer, pipeline, BitsAndBytesConfig
from transformers.modelcard import parse_log_history
from yeelight import Bulb
from accelerate import init_empty_weights

# Prepare functions

def df_to_ds(df):
    print(df.groupby('label').count())
    ds = Dataset.from_pandas(df)
    ds = ds.class_encode_column('label')
    target_map = {i: ds.features["label"].str2int(i) for i in ds.features["label"].names}
    return ds, target_map

def to_binary_classification(x, convert_dict={"*": 'Pathos', 'No_pathos': 'No_pathos'}):
    """
    Converts labels to binary classification ('Pathos' or 'No_pathos').

    Args:
        x (str): The original label.
        convert_dict (dict, optional): A dictionary mapping original labels
            to their corresponding binary representation. Defaults to
            {"Positive":'Pathos', 'Negative':'Pathos'}.

    Returns:
        str: The converted binary label.
    """ 
    if x in convert_dict.keys():
        return convert_dict[x]
    else:
        if '*' in convert_dict.keys():
            return convert_dict['*']
        else:
            return x

# # Create run configuration dicts

def ratio_split_tuple(split):
    split_s = sum([i for i in split if isinstance(i, (int, float))])
    new_split = tuple([i/split_s if isinstance(i, (int, float)) else i for i in split])
    return new_split

def param_combinations(param_dict):
    for key in param_dict.keys():
        if isinstance(param_dict[key], dict):
            param_dict[key] = param_combinations(param_dict[key])

    param_dict={i:[q] if type(q) is not list else q for (i,q) in param_dict.items()}
    keys = list(param_dict.keys())
    combinations = list(itertools.product(*param_dict.values()))
    result = [{keys[i]: combination[i] for i in range(len(keys))} for combination in combinations]

    return result

# # From csv to ds

# # # Dataset specific load to 2-col df ['text', label']

def load_predefined_dataset(data_path, binarize):
    if data_path.endswith('polish_pathos_translated.xlsx'):
        return load_polish_pathos_translated(data_path, binarize)
    elif data_path.endswith('PolarIs-Pathos.xlsx'):
        return load_PolarIs(data_path, binarize)

def load_PolarIs(path, binarize=False):
    df = pd.read_excel(path)
    df['label'] = df[['No_pathos', 'Positive', 'Negative']].idxmax(axis=1)
    df = df.rename(columns={'Sentence': 'text'})
    df = df[['text', 'label']]

    if binarize:
        df['label'] = df['label'].apply(lambda x: to_binary_classification(x, {"*": 'Pathos', 'No_pathos': 'No_pathos'}))
    return df

def load_polish_pathos_translated(data_path, binarize=False):
    df = pd.read_excel(data_path)
    df['text'] = df['English']
    df['label'] = df['cleaned_pathos']
    df = df[['text', 'label']]

    if binarize:
        df['label'] = df['label'].apply(lambda x: to_binary_classification(x, {"*": 'Pathos', 'no pathos': 'No_pathos'}))
            
    return df

def encode_labels(dataframe):
    encoder = LabelEncoder()
    dataframe['label'] = encoder.fit_transform(dataframe['label'])
    target_map = dict(zip(encoder.classes_, map(int,encoder.transform(encoder.classes_))))

    return dataframe, target_map


def split_ds(dataset, train_size=0.8, val_size=None):
    dataset = dataset.train_test_split(train_size=train_size, seed=42)
    if val_size is not None:
        val_ratio = 1 - (val_size/(1 - train_size))
        dataset2 = dataset['test'].train_test_split(train_size=val_ratio, seed=42)

        dataset['test'] = dataset2['train']
        dataset['val'] = dataset2['test']
    return dataset




def compute_metrics(eval_pred):
    # All metrics are already predefined in the HF `evaluate` package
    precision_metric = evaluate.load("precision")
    recall_metric = evaluate.load("recall")
    f1_metric = evaluate.load("f1")
    accuracy_metric = evaluate.load("accuracy")

    logits, labels = eval_pred # eval_pred is the tuple of predictions and labels returned by the model
    predictions = np.argmax(logits, axis=-1)
    precision = precision_metric.compute(predictions=predictions, references=labels, average='macro', zero_division=0)["precision"]
    recall = recall_metric.compute(predictions=predictions, references=labels, average='macro')["recall"]
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='macro')["f1"]
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    # The trainer is expecting a dictionary where the keys are the metrics names and the values are the scores.
    return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}


class WeightedCELossTrainer(Trainer):
    def add_weights(self, weights):
        self.weights = weights

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        # Get model's predictions
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # Compute custom loss
        loss_fct = torch.nn.CrossEntropyLoss(weight=torch.tensor(self.weights, device=model.device, dtype=logits.dtype))
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss


def finetune(model, tokenizer, tokenized_datasets, ds, params, target_map, log_memory=True, trainer=Trainer):
    """
    Fine-tunes a pre-trained language model for text classification. Handles tokenization, model loading, and training.

    """
    cuda_flag = torch.cuda.is_available()
    if log_memory==True and cuda_flag==False:
        print("Log memory set to True, but CUDA is unavailable. Setting to False")
        log_memory = False

    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
    trained_model_path = f"output/models/{model.config._name_or_path}_{timestamp}"
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")

    training_args = TrainingArguments(
        output_dir=f'{trained_model_path}/checkpoints',
        report_to="wandb",
        run_name=model.config._name_or_path,
        **params
    )

    trainer = WeightedCELossTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets["test"],
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
    )
    
    weights = [len(ds['train'].to_pandas()) / (len(ds['train'].to_pandas().label.value_counts()) * i) 
              for i in ds['train'].to_pandas().label.value_counts()]
    
    trainer.add_weights(weights)

    if log_memory:
        gpu_stats = torch.cuda.get_device_properties(0)
        start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
        print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
        print(f"{start_gpu_memory} GB of memory reserved.")
    
        wandb.log({'pre_gpu': {'model':gpu_stats.name, 'max_memory': max_memory, 'memory_reserved':start_gpu_memory}})

    trainer_stats = trainer.train()

    if log_memory:
        used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
        used_memory_for_training = round(used_memory - start_gpu_memory, 3)
        used_percentage = round(used_memory         /max_memory*100, 3)
        lora_percentage = round(used_memory_for_training/max_memory*100, 3)
        print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
        print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
        print(f"Peak reserved memory = {used_memory} GB.")
        print(f"Peak reserved memory for training = {used_memory_for_training} GB.")
        print(f"Peak reserved memory % of max memory = {used_percentage} %.")
        print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")
    
        wandb.log({'post_gpu': {'peak_memory': used_memory, 'training_memory':used_memory_for_training}})
        
    predicted = trainer.predict(tokenized_datasets['test'])
    predicted_labels = [int(i.argmax()) for i in predicted[0]]
    true_labels = ds['test']['label']
    wandb.log({"cm_test" : wandb.plot.confusion_matrix(probs=None,
                                                    y_true=predicted_labels, 
                                                    preds=true_labels, class_names=list(target_map.keys()))
                                                                    })

    
    wandb.finish()

    return trainer
    
def clean_cuda(objects=['model', 'tokenizer', 'tokenized_datasets', 'data_collator', 'trainer']):
    for var in objects:
        if var in locals():
            del var
        else:
            print(f'{var} does not exist')
    torch.cuda.empty_cache()
    gc.collect()
        
def init_model(model_checkpoint, ds, target_map, bnb_config=False, peft_config=False):
    cuda_flag = torch.cuda.is_available()
    
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)

    add_pad_token = True if tokenizer.pad_token is None else False
    if add_pad_token:
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token

    def token_preprocessing_function(examples):
        return tokenizer(examples['text'], truncation=True)

    # Apply the preprocessing function and remove the undesired columns
    tokenized_datasets = ds.map(token_preprocessing_function, batched=True)

    # Set to torch format
    tokenized_datasets.set_format("torch")
    
    # Change labels
    config = AutoConfig.from_pretrained(model_checkpoint, trust_remote_code=True)
    # config.vocab_size = tokenizer.vocab_size
    config.id2label = {v: k for k, v in target_map.items()}
    config.label2id = target_map

    if bnb_config == False:
        model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,
                                                                   # num_labels=2,
                                                                   config=config,
                                                                   ignore_mismatched_sizes=True,
                                                                   trust_remote_code=True,
                                                                   # device_map='auto',
                                                                   # quantization_config=bnb_config,
                                                                   )
    else:
        bnb_config = BitsAndBytesConfig(**run_params['bnb_config'])
        model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,
                                                                   # num_labels=2,
                                                                   config=config,
                                                                   ignore_mismatched_sizes=True,
                                                                   trust_remote_code=True,
                                                                   device_map='auto',
                                                                   quantization_config=bnb_config,
                                                                   )

    model.tie_weights()

    if peft_config != False:
        peft_config = LoraConfig(**run_params['peft_config'])
        
        model = prepare_model_for_kbit_training(model)
        print('Model prepared')
        model = get_peft_model(model, peft_config)
        print('Model perfed')
        model.print_trainable_parameters()

    if cuda_flag:
        model = model.cuda()
        print(model.device)
        print('Model to cuda')

    # model.config.use_cache = False
    # model.config.pretraining_tp = 1

    if add_pad_token:
        model.resize_token_embeddings(len(tokenizer))
        model.config.pad_token_id = model.config.eos_token_id

    return model, tokenizer, tokenized_datasets


# Validate functions

# def get_log_for_val(checkpoint_path, logs_path, sort_col='Step'):
#     """
#     Retrieves the training log entry corresponding to a given checkpoint.

#     Args:
#         checkpoint_path (str): The path to the checkpoint directory.
#         logs_path (str): The path to the CSV file containing the training logs.
#         sort_col (str, optional): The column to sort by when searching for the latest checkpoint. Defaults to 'Step'.

#     Returns:
#         pd.Series: A Pandas Series representing a single row of the training logs.
#     """
#     training_logs = pd.read_csv(logs_path)
#     if 'checkpoint-' in checkpoint_path:
#         temp_path = checkpoint_path.rsplit('models/', 1)[-1]
#         model_path, checkpoint_num = temp_path.rsplit('/checkpoints/checkpoint-')
#         row = training_logs[(training_logs['model_path'].apply(lambda x: x.rsplit('models/', 1)[-1] == model_path)) & (
#                     training_logs['Step'] == int(checkpoint_num))]
#     else:
#         row = training_logs[training_logs['model_path'] == checkpoint_path].sort_values(sort_col, ascending=False).head(
#             1)
#     return row.iloc[0]


# def validate(row, ds):
#     """
#     Loads a trained model from a checkpoint and evaluates its performance on the validation set.

#     Args:
#         row (pd.Series): A single row from the training logs, containing checkpoint information.
#         ds (DatasetDict): A Hugging Face DatasetDict containing a 'validate' split.

#     Returns:
#         tuple: A tuple containing:
#             * predicted (list): List of predicted labels.
#             * val_labels (list): List of true labels.
#     """
#     val_sentences = ds['validate']['sentence']
#     val_labels = [reversed_target_map[i] for i in ds['validate']['label']]

#     classifier = pipeline('text-classification',
#                           model=os.path.join(row['model_path'], 'checkpoints', f"checkpoint-{row['Step']}"), device=0)
#     predicted = [i['label'] for i in classifier(val_sentences)]
#     return predicted, val_labels


# def val_metrics(predicted, val_labels, target_map):
#     """
#     Calculates and displays validation metrics (accuracy, F1-score, confusion matrix).

#     Args:
#         predicted (list): List of predicted labels.
#         val_labels (list): List of true labels.
#         target_map (dict): A mapping of original labels to numerical indices.
#     """
#     print("acc:", accuracy_score(val_labels, predicted))
#     print("f1:", f1_score(val_labels, predicted, average='macro'))

#     cm = confusion_matrix(val_labels, predicted, normalize='true')
#     plot_cm(cm, target_map)


def plot_cm(cm, target_map):
    classes = list(target_map.keys())
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
    ax = sn.heatmap(df_cm, annot=True, fmt='.2g')
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Target")


def yeelight_eow_notification(bulb_ip):
    bulb = Bulb(bulb_ip)
    bulb.turn_on()
    bulb.set_rgb(0, 255, 0)
    bulb.set_brightness(100)

In [11]:
# set run environment (local/colab), if colab move proper dir
import os
from pathlib import Path

if os.getenv("COLAB_RELEASE_TAG"):
    colab = True

    from google.colab import drive
    drive.mount('/content/drive')
    %cd /content/drive/Othercomputers/My computer/EQILLM/

    !pip install -r requirements.txt -q --exists-action i
    !pip install transformers[torch] -q --exists-action i
    !pip install accelerate -U -q --exists-action i
else:
    colab = False

import torch, gc
import wandb
# from numba import cuda

from datasets import Dataset, load_dataset
from dotenv import load_dotenv, dotenv_values
from huggingface_hub import login
from tqdm.auto import tqdm
from tqdm.notebook import tqdm_notebook
from peft import TaskType

# from eqillm import *

# import os
# import json
# import pickle
# from pathlib import Path
# import torch, gc
# import wandb
# # from numba import cuda
# from csv import writer
# from datasets import Dataset, load_dataset
# from dotenv import load_dotenv, dotenv_values
# from huggingface_hub import login
# from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
# from transformers import AutoModelForSequenceClassification, Trainer, BitsAndBytesConfig, AutoTokenizer, DataCollatorWithPadding

# from eqillm import finetune, yeelight_eow_notification, param_combinations, load_PolarIs, split_ds, encode_labels, init_model, to_binary_classification, load_predefined_dataset


dotenv_config = dotenv_values('.env')
yeelight_notify = dotenv_config['YEELIGHT_NOTIFY'] if ('YEELIGHT_NOTIFY' in dotenv_config) and (colab) else False

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
login(token=dotenv_config['HF_TOKEN'])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Jakub\.cache\huggingface\token
Login successful


In [12]:
# if something is passed as list, all possible combinations of provided parameters will be created. 
# To omit that behaviour for variable requiring multiple values (for example, target modules in peft or data splits
# pass them as tuple 

run_config = {'data_path': [
                               'data/PolarIs-Pathos.xlsx',
                               # 'data/polish_pathos_translated.xlsx',
                              ],
                 # -----------------------
                 'wandb_init_params': {
                    'project': [
                                # "polish_pathos_translated",
                                "test"
                                 ],
                    'group':  "binary",
                    },
                 # -----------------------
                 'model_name': [
                            'distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                           #  'michellejieli/emotion_text_classifier',
                           #  'cardiffnlp/twitter-xlm-roberta-base-sentiment',
                           #  'celine98/canine-s-finetuned-sst2',
                           #  'lxyuan/distilbert-base-multilingual-cased-sentiments-student',
                           #  'michelecafagna26/t5-base-finetuned-sst2-sentiment',
                           # 'nlptown/bert-base-multilingual-uncased-sentiment',
                           # 'ProsusAI/finbert',
                           # 'arpanghoshal/EmoRoBERTa',
                           # 'camembert-base'
                           # 'cardiffnlp/twitter-roberta-base-irony',
                           # 'cardiffnlp/twitter-roberta-base-sentiment-latest',
                           # 'ctrl',
                           # 'distilroberta-base',
                           # 'flaubert/flaubert_base_cased',
                           # 'j-hartmann/emotion-english-distilroberta-base',
                           # 'joeddav/distilbert-base-uncased-go-emotions-student',
                           # 'lxyuan/distilbert-base-multilingual-cased-sentiments-student',
                           # 'nlptown/bert-base-multilingual-uncased-sentiment',
                           # 'papluca/xlm-roberta-base-language-detection',
                           # 'roberta-base',
                           # 'xlnet-base-cased',
                           # 'facebook/tart-full-flan-t5-xl',
                           # 'lytang/MiniCheck-Flan-T5-Large',
                           # 'microsoft/phi-2',
                           # 'meta-llama/Meta-Llama-3-8B',
                           # 'lightblue/suzume-llama-3-8B-multilingual',
                           # 'google/gemma-2b',
                           # 'mistralai/Mistral-7B-v0.1',
                           # 'tiiuae/falcon-11B' ,
                           ], # Pre-trained model names from the Hugging Face hub used for fine-tuning
                 # --------------------------
                  'split': [
                           # (0.9, 0.1),
                           (0.8, 0.2),
                           # (0.7, 0.3),
                           # (0.6, 0.4),
                           # (0.5, 0.5),
                          ], # Divides the dataset into training, testing, (and optionally) validation sets. Use 'balanced' for equal class representation in the validation set. Examples: (90,10) -> split into train and test proportionally; (80, 10, 10) splits into train,test, validate proportionally.
                 'binary': False, # Indicates whether the task is binary (two classes) or multi-class classification.,
                 'balanced': False, # his way labels used for training are split evenly, fitting size to the lowest label count. n (equal to 80% of least represented label) will be taken from each label, rest will be used for test.
                 # --------------------------
                 'training_arguments': {
                     'num_train_epochs': 1, # Number of times the model sees the entire training dataset.
                     'per_device_train_batch_size': 16, # Number of samples processed in each training step (personally, 8/16 work best, 16 is faster, but you may find linear drop in inference speed during fine-tuning).
                     'per_device_eval_batch_size': 64, # Number of samples processed in each evaluation step.
                     # 'gradient_accumulation_steps': 4,
                     'gradient_checkpointing': True,
                     #-----------------------------
                     'save_total_limit': 2,
                     'load_best_model_at_end': True,
                     'save_strategy': 'steps', # Controls when to save model checkpoints ('steps', 'epoch' or 'no').
                     'metric_for_best_model': 'f1-score',
                     #-----------------------------
                     'evaluation_strategy': "steps",
                     'logging_steps': 20,
                     'max_steps': 400,
                     'fp16': False,
                     # 'use_cpu': False,
                     #-----------------------------
                     'learning_rate': [
                                        # 1e-6,
                                        5e-5,
                                        # 1e-5,
                                        # 1e-3,
                                        # 1e-2,
                                        ],
                     'lr_scheduler_type': [
                                       # "constant",
                                       # "constant_with_warmup",
                                         "linear",
                                       # "polynomial",
                                      ],
                     'warmup_ratio': [
                                      0.1,
                                      # 0.2,
                                      # 0.3
                                     ], #0.1
                     'max_grad_norm': [
                                       # 0.1,
                                       # 0.2,
                                       0.3
                                      ],  #0.3
                     'weight_decay': [
                                      0.001,
                                      # 0.01
                                     ], # 0.001
                 },
                 #-----------------------------
                 # 'callbacks': [EarlyStoppingCallback(early_stopping_patience=3)], #trainer not training args
                 'bnb_config': [
                                False,
                                # {bnb_4bit_compute_dtype=torch.bfloat16, 'load_in_4bit': True, 'bnb_4bit_quant_type': "nf4", 'bnb_4bit_use_double_quant': True}
                                 ],
                 'peft_config': [
                                False,
                                # {'r': 8,
                                # 'lora_alpha': 32,
                                # 'lora_dropout': 0.1,
                                # 'bias': "none",
                                # 'target_modules': ("q_proj", "v_proj",)
                                # 'target_modules': "all-linear"
                                # }
                                ],
                    }


# overwrite = {
#             'mistralai/Mistral-7B-v0.1' : {'training_params':{'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'gradient_checkpointing': True,},
#                'peft_config' : LoraConfig(task_type=TaskType.SEQ_CLS,
#                                                     r=8,
#                                                     lora_alpha=32,
#                                                     lora_dropout=0.1,
#                                                     bias="none",
#                                                     target_modules='all-linear'
#                                                     # target_modules=[
#                                                          # "q_proj",
#                                                          # "v_proj",
#                                                             # ]
#                                                 )
#             }}

compute_dtype = getattr(torch, "float16")
print(compute_dtype)

# Controls whether to save logs during a process. When set to False, logging is disabled.
save_logs = True

run_params_serie = param_combinations(run_config)
print(len(run_params_serie))
print(*run_params_serie, sep='\n')

torch.float16
1
{'data_path': 'data/PolarIs-Pathos.xlsx', 'wandb_init_params': {'project': 'test', 'group': 'binary'}, 'model_name': 'distilbert/distilbert-base-uncased-finetuned-sst-2-english', 'split': (0.8, 0.2), 'binary': False, 'balanced': False, 'training_arguments': {'num_train_epochs': 1, 'per_device_train_batch_size': 16, 'per_device_eval_batch_size': 64, 'gradient_checkpointing': True, 'save_total_limit': 2, 'load_best_model_at_end': True, 'save_strategy': 'steps', 'metric_for_best_model': 'f1-score', 'evaluation_strategy': 'steps', 'logging_steps': 20, 'max_steps': 400, 'fp16': False, 'learning_rate': 5e-05, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.1, 'max_grad_norm': 0.3, 'weight_decay': 0.001}, 'bnb_config': False, 'peft_config': False}


In [13]:
for run_params in run_params_serie:                  
    wandb.init(name=run_params['model_name'], **run_params['wandb_init_params'])
    
    df = load_predefined_dataset(run_params['data_path'], run_params['binary'])
    ds, target_map = df_to_ds(df)
    ds = split_ds(ds, train_size=run_params['split'][0])
    
    try:
        model_name = run_params['model_name']
        model, tokenizer, tokenized_datasets = init_model(run_params['model_name'], ds, target_map)
        trainer = finetune(model, tokenizer, tokenized_datasets, ds, run_params['training_arguments'], target_map)
    except Exception as exc:
        print(exc)
        wandb.log({'error': str(exc)})
        wandb.finish(1)

    clean_cuda()


            text
label           
Negative    4106
No_pathos  10637
Positive     845


Casting to class labels:   0%|          | 0/15588 [00:00<?, ? examples/s]



Map:   0%|          | 0/12470 [00:00<?, ? examples/s]

Map:   0%|          | 0/3118 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased-finetuned-sst-2-english and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([2]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([2, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


cuda:0
Model to cuda
GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.
1.406 GB of memory reserved.




Step,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
20,1.0143,0.911334,0.253966,0.334067,0.274927,0.664849
40,0.8078,1.022055,0.224182,0.333333,0.268073,0.672547
60,0.8682,0.906151,0.224182,0.333333,0.268073,0.672547
80,0.8884,0.85708,0.224182,0.333333,0.268073,0.672547
100,0.8978,0.865882,0.224182,0.333333,0.268073,0.672547
120,0.8758,0.815815,0.397794,0.355035,0.309352,0.673509
140,0.6978,0.975928,0.325371,0.372922,0.330714,0.666774
160,0.8737,0.929626,0.447494,0.356148,0.311068,0.675754
180,0.6822,0.875025,0.330819,0.409235,0.361746,0.663566
200,0.6416,1.069023,0.425173,0.361736,0.320611,0.674792




129.8719 seconds used for training.
2.16 minutes used for training.
Peak reserved memory = 2.807 GB.
Peak reserved memory for training = 1.401 GB.
Peak reserved memory % of max memory = 28.07 %.
Peak reserved memory for training % of max memory = 14.01 %.


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.858609794628752, max=1.0)…

0,1
eval/accuracy,▁▄▄▄▄▄▂▅▁▅▄▅▆█▄▆▇▆▆▆
eval/f1-score,▁▁▁▁▁▄▅▄▇▄▂▅▅▆▂▄████
eval/loss,▃▆▃▂▂▁▅▃▂▇▇▅█▅█▆▂▂▂▂
eval/precision,▁▁▁▁▁▃▂▄▂▃▄▃▇▇▄█▇▆▆▆
eval/recall,▁▁▁▁▁▃▅▃█▄▂▄▄▅▂▃▇█▇▇
eval/runtime,█▂▇▆▅▄▄▃▂▃▂▃▂▁▁▂▂▂▂▃
eval/samples_per_second,▁▆▂▃▄▄▅▆▆▆▇▆▆██▇▇▇▇▆
eval/steps_per_second,▁▆▂▃▄▄▅▆▇▆▇▆▆██▇▇▇▇▆
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇██████

0,1
eval/accuracy,0.678
eval/f1-score,0.37886
eval/loss,0.86302
eval/precision,0.65725
eval/recall,0.40284
eval/runtime,5.482
eval/samples_per_second,568.772
eval/steps_per_second,8.938
train/epoch,0.51
train/global_step,400.0


In [23]:
parse_log_history(trainer.state.log_history)

({'train_runtime': 129.8719,
  'train_samples_per_second': 49.279,
  'train_steps_per_second': 3.08,
  'total_flos': 79494275654496.0,
  'train_loss': 0.7891078948974609,
  'epoch': 0.51,
  'step': 400},
 [{'Training Loss': 1.0143,
   'Epoch': 0.03,
   'Step': 20,
   'Validation Loss': 0.9113337993621826,
   'Precision': 0.25396591523009676,
   'Recall': 0.3340666134679118,
   'F1-score': 0.27492713504305355,
   'Accuracy': 0.6648492623476587},
  {'Training Loss': 0.8078,
   'Epoch': 0.05,
   'Step': 40,
   'Validation Loss': 1.022054672241211,
   'Precision': 0.22418216805644645,
   'Recall': 0.3333333333333333,
   'F1-score': 0.26807286673058484,
   'Accuracy': 0.6725465041693394},
  {'Training Loss': 0.8682,
   'Epoch': 0.08,
   'Step': 60,
   'Validation Loss': 0.9061512351036072,
   'Precision': 0.22418216805644645,
   'Recall': 0.3333333333333333,
   'F1-score': 0.26807286673058484,
   'Accuracy': 0.6725465041693394},
  {'Training Loss': 0.8884,
   'Epoch': 0.1,
   'Step': 80,
  

In [None]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified by TOFILL'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in list(
                          locals().items())), key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))