In [1]:
import datetime

import evaluate
import numpy as np
import pandas as pd
import seaborn as sn
import itertools
import torch
import wandb
import copy

from datasets import Dataset, DatasetDict
from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
from sklearn.preprocessing import LabelEncoder
from transformers import AutoModelForSequenceClassification, AutoConfig, Trainer, TrainingArguments, DataCollatorWithPadding, AutoTokenizer, pipeline, BitsAndBytesConfig
from yeelight import Bulb
from transformers.modelcard import parse_log_history

from colorama import Fore, Style
import time

# # # Dataset specific load to 2-col df ['text', label']

def load_predefined_dataset(data_path, binarize):
    if data_path.endswith('polish_pathos_translated.xlsx'):
        return load_polish_pathos_translated(data_path, binarize)
    elif data_path.endswith('PolarIs-Pathos.xlsx'):
        return load_PolarIs(data_path, binarize)

def load_PolarIs(path, binarize=False):
    df = pd.read_excel(path)
    df['label'] = df[['No_pathos', 'Positive', 'Negative']].idxmax(axis=1)
    df = df.rename(columns={'Sentence': 'text'})
    df = df[['text', 'label']]

    if binarize:
        df['label'] = df['label'].apply(lambda x: to_binary_classification(x, {"*": 'Pathos', 'No_pathos': 'No_pathos'}))
    return df

def load_polish_pathos_translated(data_path, binarize=False):
    df = pd.read_excel(data_path)
    df['text'] = df['English']
    df['label'] = df['cleaned_pathos']
    df = df[['text', 'label']]

    if binarize:
        df['label'] = df['label'].apply(lambda x: to_binary_classification(x, {"*": 'Pathos', 'no pathos': 'No_pathos'}))

    return df




# Cleaning memory

def get_var_from_name(passed):
    # if string, return object
    if isinstance(passed, str):
        try:
            return locals()[passed]
        except:
            raise Exception(f'{passed} not in locals()')
    else:
        raise TypeError(f'{passed} is not a string')

def del_obj(el):
    try:
        if isinstance(el, str):
            el_to_del = get_var_from_name(el)
        else:
            el_to_del = el
        del el_to_del
    except: 
        print(Fore.RED + f"Warning: {el} cannot be deleted by clean_memory(), skipping..." + Style.RESET_ALL)

def clean_memory(to_del=False):
    if to_del:
        if not isinstance(to_del, list):
            to_del = [to_del]
        for el in to_del:
            del_obj(el)
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.reset_peak_memory_stats()

def get_cuda_memory(device_no=0):
    returned = {}

    gpu_stats = torch.cuda.get_device_properties(device_no)
    
    returned['name'] = gpu_stats.name
    returned['device_no'] = device_no
    returned['total_memory'] = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
    returned['reserved'] =  round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

    return returned

# dataset preaparation

def df_to_ds(df):
    print(df.groupby('label').count())
    ds = Dataset.from_pandas(df)
    ds = ds.class_encode_column('label')
    target_map = {i: ds.features["label"].str2int(i) for i in ds.features["label"].names}
    return ds, target_map

def to_binary_classification(x, convert_dict={"*": 'Pathos', 'No_pathos': 'No_pathos'}):
    """
    Converts labels to binary classification ('Pathos' or 'No_pathos').

    Args:
        x (str): The original label.
        convert_dict (dict, optional): A dictionary mapping original labels
            to their corresponding binary representation. Defaults to
            {"Positive":'Pathos', 'Negative':'Pathos'}.

    Returns:
        str: The converted binary label.
    """
    if x in convert_dict.keys():
        return convert_dict[x]
    else:
        if '*' in convert_dict.keys():
            return convert_dict['*']
        else:
            return x

def split_dataset(df, class_column):
    min_samples = df[class_column].value_counts().min()
    balanced_df = pd.DataFrame()
    for cls in df[class_column].unique():
        cls_samples = df[df[class_column] == cls].sample(n=min_samples)
        balanced_df = pd.concat([balanced_df, cls_samples])
    remaining_df = df.drop(balanced_df.index)
    return balanced_df, remaining_df

def ratio_split_tuple(split):
    split_s = sum([i for i in split if isinstance(i, (int, float))])
    new_split = tuple([i/split_s if isinstance(i, (int, float)) else i for i in split])
    return new_split

def encode_labels(dataframe):
    encoder = LabelEncoder()
    dataframe['label'] = encoder.fit_transform(dataframe['label'])
    target_map = dict(zip(encoder.classes_, map(int,encoder.transform(encoder.classes_))))

    return dataframe, target_map


def split_ds(dataset, train_size=0.8, val_size=None):
    dataset = dataset.train_test_split(train_size=train_size, seed=42)
    if val_size is not None:
        val_ratio = 1 - (val_size/(1 - train_size))
        dataset2 = dataset['test'].train_test_split(train_size=val_ratio, seed=42)

        dataset['test'] = dataset2['train']
        dataset['val'] = dataset2['test']
    return dataset

# # Create run configuration dicts

def param_combinations(param_dict):
    for key in param_dict.keys():
        if isinstance(param_dict[key], dict):
            param_dict[key] = param_combinations(param_dict[key])

    param_dict={i:[q] if type(q) is not list else q for (i,q) in param_dict.items()}
    keys = list(param_dict.keys())
    combinations = list(itertools.product(*param_dict.values()))
    result = [{keys[i]: combination[i] for i in range(len(keys))} for combination in combinations]
    result = [copy.deepcopy(combination) for combination in result]
    return result

# Trainer and metrics

def compute_metrics(eval_pred):
    # All metrics are already predefined in the HF `evaluate` package
    precision_metric = evaluate.load("precision")
    recall_metric = evaluate.load("recall")
    f1_metric = evaluate.load("f1")
    accuracy_metric = evaluate.load("accuracy")

    logits, labels = eval_pred # eval_pred is the tuple of predictions and labels returned by the model
    predictions = np.argmax(logits, axis=-1)
    precision = precision_metric.compute(predictions=predictions, references=labels, average='macro', zero_division=0)["precision"]
    recall = recall_metric.compute(predictions=predictions, references=labels, average='macro')["recall"]
    f1 = f1_metric.compute(predictions=predictions, references=labels, average='macro')["f1"]
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    # The trainer is expecting a dictionary where the keys are the metrics names and the values are the scores.
    return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}

class WeightedCELossTrainer(Trainer):
    def add_weights(self, weights):
        self.weights = weights

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        # Get model's predictions
        outputs = model(**inputs)
        logits = outputs.get("logits")
        # Compute custom loss
        loss_fct = torch.nn.CrossEntropyLoss(weight=torch.tensor(self.weights, device=model.device, dtype=logits.dtype))
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss


# Run

def single_run(run_params, df):
    if 'report_to' in run_params:
        if run_params['report_to']=='wandb':
            wandb_log=True

    if wandb_log:
        wandb.init(name=run_params['model_name'], **run_params['wandb_init_params'])
    
    ds, target_map = df_to_ds(df)
    ds = split_ds(ds, train_size=run_params['split'][0])
    try:
        model_name = run_params['model_name']
        model, tokenizer, tokenized_datasets = init_model(run_params['model_name'], ds, target_map)
        trainer = finetune(model, tokenizer, tokenized_datasets, ds, run_params['training_arguments'], target_map)
        # clean_memory([model, tokenizer, tokenized_datasets, trainer])
        return run_params, trainer
    
    except Exception as exc:
        print(exc)
    
        if wandb_log:
            wandb.log({'error': str(exc)})
            wandb.finish(1)

def finetune(model, tokenizer, tokenized_datasets, ds, params, target_map, log_memory=True, trainer=Trainer):
    """
    Fine-tunes a pre-trained language model for text classification. Handles tokenization, model loading, and training.

    """
    cuda_flag = torch.cuda.is_available()
    if log_memory==True and cuda_flag==False:
        print("Log memory set to True, but CUDA is unavailable. Setting to False")
        log_memory = False

    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
    trained_model_path = f"output/models/{model.config._name_or_path}_{timestamp}"
    
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")

    training_args = TrainingArguments(
        output_dir=f'{trained_model_path}/checkpoints',
        run_name=model.config._name_or_path,
        **params
    )
    
    trainer = WeightedCELossTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets["test"],
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
    )

    weights = [len(ds['train'].to_pandas()) / (len(ds['train'].to_pandas().label.value_counts()) * i)
               for i in ds['train'].to_pandas().label.value_counts()]

    trainer.add_weights(weights)
    
    if log_memory:
        cuda_prestats = get_cuda_memory(0)
        start_gpu_memory = cuda_prestats['reserved']
        max_memory = cuda_prestats['total_memory']
        
        print(f"GPU = {cuda_prestats['name']}. Max memory = {max_memory} GB.")
        print(f"{start_gpu_memory} GB of memory reserved.")

        wandb.log({'pre_gpu': {'model':cuda_prestats['name'], 'max_memory': max_memory, 'memory_reserved':start_gpu_memory}})

    trainer_stats = trainer.train()

    if log_memory:
        cuda_poststats = get_cuda_memory(0)
        
        used_memory = cuda_poststats['reserved']
        used_memory_for_training = round(used_memory - start_gpu_memory, 3)
        used_percentage = round(used_memory/max_memory*100, 3)
        lora_percentage = round(used_memory_for_training/max_memory*100, 3)
        print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
        print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
        print(f"Peak reserved memory = {used_memory} GB.")
        print(f"Peak reserved memory for training = {used_memory_for_training} GB.")
        print(f"Peak reserved memory % of max memory = {used_percentage} %.")
        print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

        wandb.log({'post_gpu': {'peak_memory': used_memory, 'training_memory':used_memory_for_training}})

    predicted = trainer.predict(tokenized_datasets['test'])
    predicted_labels = [int(i.argmax()) for i in predicted[0]]
    true_labels = ds['test']['label']
    wandb.log({"cm_test" : wandb.plot.confusion_matrix(probs=None,
                                                       y_true=predicted_labels,
                                                       preds=true_labels, class_names=list(target_map.keys()))
               })


    wandb.finish()

    # clean_memory()

    return trainer



def init_model(model_checkpoint, ds, target_map, bnb_config=False, peft_config=False):
    cuda_flag = torch.cuda.is_available()

    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)

    add_pad_token = True if tokenizer.pad_token is None else False
    if add_pad_token:
        tokenizer.pad_token_id = tokenizer.eos_token_id
        tokenizer.pad_token = tokenizer.eos_token

    def token_preprocessing_function(examples):
        return tokenizer(examples['text'], truncation=True)

    # Apply the preprocessing function and remove the undesired columns
    tokenized_datasets = ds.map(token_preprocessing_function, batched=True)

    # Set to torch format
    tokenized_datasets.set_format("torch")

    # Change labels
    config = AutoConfig.from_pretrained(model_checkpoint, trust_remote_code=True)
    # config.vocab_size = tokenizer.vocab_size
    config.id2label = {v: k for k, v in target_map.items()}
    config.label2id = target_map

    if bnb_config == False:
        model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,
                                                                   # num_labels=2,
                                                                   config=config,
                                                                   ignore_mismatched_sizes=True,
                                                                   trust_remote_code=True,
                                                                   # device_map='auto',
                                                                   # quantization_config=bnb_config,
                                                                   )
    else:
        bnb_config = BitsAndBytesConfig(**run_params['bnb_config'])
        model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint,
                                                                   # num_labels=2,
                                                                   config=config,
                                                                   ignore_mismatched_sizes=True,
                                                                   trust_remote_code=True,
                                                                   device_map='auto',
                                                                   quantization_config=bnb_config,
                                                                   )

    model.tie_weights()

    if peft_config != False:
        peft_config = LoraConfig(**run_params['peft_config'])

        model = prepare_model_for_kbit_training(model)
        print('Model prepared')
        model = get_peft_model(model, peft_config)
        print('Model perfed')
        model.print_trainable_parameters()

    if cuda_flag:
        model = model.cuda()
        print(model.device)
        print('Model to cuda')

    # model.config.use_cache = False
    # model.config.pretraining_tp = 1

    if add_pad_token:
        model.resize_token_embeddings(len(tokenizer))
        model.config.pad_token_id = model.config.eos_token_id

    return model, tokenizer, tokenized_datasets


# Validate functions

# def get_log_for_val(checkpoint_path, logs_path, sort_col='Step'):
#     """
#     Retrieves the training log entry corresponding to a given checkpoint.

#     Args:
#         checkpoint_path (str): The path to the checkpoint directory.
#         logs_path (str): The path to the CSV file containing the training logs.
#         sort_col (str, optional): The column to sort by when searching for the latest checkpoint. Defaults to 'Step'.

#     Returns:
#         pd.Series: A Pandas Series representing a single row of the training logs.
#     """
#     training_logs = pd.read_csv(logs_path)
#     if 'checkpoint-' in checkpoint_path:
#         temp_path = checkpoint_path.rsplit('models/', 1)[-1]
#         model_path, checkpoint_num = temp_path.rsplit('/checkpoints/checkpoint-')
#         row = training_logs[(training_logs['model_path'].apply(lambda x: x.rsplit('models/', 1)[-1] == model_path)) & (
#                     training_logs['Step'] == int(checkpoint_num))]
#     else:
#         row = training_logs[training_logs['model_path'] == checkpoint_path].sort_values(sort_col, ascending=False).head(
#             1)
#     return row.iloc[0]


# def validate(row, ds):
#     """
#     Loads a trained model from a checkpoint and evaluates its performance on the validation set.

#     Args:
#         row (pd.Series): A single row from the training logs, containing checkpoint information.
#         ds (DatasetDict): A Hugging Face DatasetDict containing a 'validate' split.

#     Returns:
#         tuple: A tuple containing:
#             * predicted (list): List of predicted labels.
#             * val_labels (list): List of true labels.
#     """
#     val_sentences = ds['validate']['sentence']
#     val_labels = [reversed_target_map[i] for i in ds['validate']['label']]

#     classifier = pipeline('text-classification',
#                           model=os.path.join(row['model_path'], 'checkpoints', f"checkpoint-{row['Step']}"), device=0)
#     predicted = [i['label'] for i in classifier(val_sentences)]
#     return predicted, val_labels


# def val_metrics(predicted, val_labels, target_map):
#     """
#     Calculates and displays validation metrics (accuracy, F1-score, confusion matrix).

#     Args:
#         predicted (list): List of predicted labels.
#         val_labels (list): List of true labels.
#         target_map (dict): A mapping of original labels to numerical indices.
#     """
#     print("acc:", accuracy_score(val_labels, predicted))
#     print("f1:", f1_score(val_labels, predicted, average='macro'))

#     cm = confusion_matrix(val_labels, predicted, normalize='true')
#     plot_cm(cm, target_map)


def plot_cm(cm, target_map):
    classes = list(target_map.keys())
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
    ax = sn.heatmap(df_cm, annot=True, fmt='.2g')
    ax.set_xlabel("Predicted")
    ax.set_ylabel("Target")


def yeelight_eow_notification(bulb_ip):
    bulb = Bulb(bulb_ip)
    bulb.turn_on()
    bulb.set_rgb(0, 255, 0)
    bulb.set_brightness(100)

In [2]:
# set run environment (local/colab), if colab move proper dir
import os
from pathlib import Path

if os.getenv("COLAB_RELEASE_TAG"):
    colab = True

    from google.colab import drive
    drive.mount('/content/drive')
    %cd /content/drive/Othercomputers/My computer/EQILLM/

    !pip install -r requirements.txt -q --exists-action i
    !pip install transformers[torch] -q --exists-action i
    !pip install accelerate -U -q --exists-action i
else:
    colab = False

import torch, gc
import wandb
# from numba import cuda

from datasets import Dataset, load_dataset
from dotenv import load_dotenv, dotenv_values
from huggingface_hub import login
from tqdm.auto import tqdm
from tqdm.notebook import tqdm_notebook
from peft import TaskType

# from eqillm import *

# import os
# import json
# import pickle
# from pathlib import Path
# import torch, gc
# import wandb
# # from numba import cuda
# from csv import writer
# from datasets import Dataset, load_dataset
# from dotenv import load_dotenv, dotenv_values
# from huggingface_hub import login
# from peft import get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training
# from transformers import AutoModelForSequenceClassification, Trainer, BitsAndBytesConfig, AutoTokenizer, DataCollatorWithPadding

# from eqillm import finetune, yeelight_eow_notification, param_combinations, load_PolarIs, split_ds, encode_labels, init_model, to_binary_classification, load_predefined_dataset


dotenv_config = dotenv_values('env')
yeelight_notify = dotenv_config['YEELIGHT_NOTIFY'] if ('YEELIGHT_NOTIFY' in dotenv_config) and (colab) else False

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
login(token=dotenv_config['HF_TOKEN'])

# if something is passed as list, all possible combinations of provided parameters will be created. 
# To omit that behaviour for variable requiring multiple values (for example, target modules in peft or data splits
# pass them as tuple 

run_config = {'data_path': [
                               'data/PolarIs-Pathos.xlsx',
                               # 'data/polish_pathos_translated.xlsx',
                              ],
                 # -----------------------
                 'report_to': "wandb",
                 'wandb_init_params': {
                    'project': [
                                # "polish_pathos_translated",
                                "test"
                                 ],
                    'group':  "binary",
                    },
                 # -----------------------
                 'model_name': [
                            'distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                            'michellejieli/emotion_text_classifier',
                           #  'cardiffnlp/twitter-xlm-roberta-base-sentiment',
                           #  'celine98/canine-s-finetuned-sst2',
                           #  'lxyuan/distilbert-base-multilingual-cased-sentiments-student',
                           #  'michelecafagna26/t5-base-finetuned-sst2-sentiment',
                           # 'nlptown/bert-base-multilingual-uncased-sentiment',
                           # 'ProsusAI/finbert',
                           # 'arpanghoshal/EmoRoBERTa',
                           # 'camembert-base'
                           # 'cardiffnlp/twitter-roberta-base-irony',
                           # 'cardiffnlp/twitter-roberta-base-sentiment-latest',
                           # 'ctrl',
                           # 'distilroberta-base',
                           # 'flaubert/flaubert_base_cased',
                           # 'j-hartmann/emotion-english-distilroberta-base',
                           # 'joeddav/distilbert-base-uncased-go-emotions-student',
                           # 'lxyuan/distilbert-base-multilingual-cased-sentiments-student',
                           # 'nlptown/bert-base-multilingual-uncased-sentiment',
                           # 'papluca/xlm-roberta-base-language-detection',
                           # 'roberta-base',
                           # 'xlnet-base-cased',
                           # 'facebook/tart-full-flan-t5-xl',
                           # 'lytang/MiniCheck-Flan-T5-Large',
                           # 'microsoft/phi-2',
                           # 'meta-llama/Meta-Llama-3-8B',
                           # 'lightblue/suzume-llama-3-8B-multilingual',
                           # 'google/gemma-2b',
                           # 'mistralai/Mistral-7B-v0.1',
                           # 'tiiuae/falcon-11B' ,
                           ], # Pre-trained model names from the Hugging Face hub used for fine-tuning
                 # --------------------------
                  'split': [
                           # (0.9, 0.1),
                           (0.98, 0.02),
                           # (0.7, 0.3),
                           # (0.6, 0.4),
                           # (0.5, 0.5),
                          ], # Divides the dataset into training, testing, (and optionally) validation sets. Use 'balanced' for equal class representation in the validation set. Examples: (90,10) -> split into train and test proportionally; (80, 10, 10) splits into train,test, validate proportionally.
                 'binary': False, # Indicates whether the task is binary (two classes) or multi-class classification.,
                 'balanced': False, # his way labels used for training are split evenly, fitting size to the lowest label count. n (equal to 80% of least represented label) will be taken from each label, rest will be used for test.
                 # --------------------------
                 'training_arguments': {
                     'num_train_epochs': 1, # Number of times the model sees the entire training dataset.
                     'per_device_train_batch_size': 256, # Number of samples processed in each training step (personally, 8/16 work best, 16 is faster, but you may find linear drop in inference speed during fine-tuning).
                     'per_device_eval_batch_size': 256, # Number of samples processed in each evaluation step.
                     # 'gradient_accumulation_steps': 4,
                     'gradient_checkpointing': True,
                     #-----------------------------
                     'save_total_limit': 2,
                     'load_best_model_at_end': True,
                     'save_strategy': 'steps', # Controls when to save model checkpoints ('steps', 'epoch' or 'no').
                     'metric_for_best_model': 'f1-score',
                     #-----------------------------
                     'evaluation_strategy': "steps",
                     'logging_steps': 20,
                     'max_steps': 20,
                     'fp16': False,
                     # 'use_cpu': False,
                     #-----------------------------
                     'learning_rate': [
                                        # 1e-6,
                                        5e-5,
                                        # 1e-5,
                                        # 1e-3,
                                        # 1e-2,
                                        ],
                     'lr_scheduler_type': [
                                       # "constant",
                                       # "constant_with_warmup",
                                         "linear",
                                       # "polynomial",
                                      ],
                     'warmup_ratio': [
                                      0.1,
                                      # 0.2,
                                      # 0.3
                                     ], #0.1
                     'max_grad_norm': [
                                       # 0.1,
                                       # 0.2,
                                       0.3
                                      ],  #0.3
                     'weight_decay': [
                                      0.001,
                                      # 0.01
                                     ], # 0.001
                 },
                 #-----------------------------
                 # 'callbacks': [EarlyStoppingCallback(early_stopping_patience=3)], #trainer not training args
                 'bnb_config': [
                                False,
                                # {bnb_4bit_compute_dtype=torch.bfloat16, 'load_in_4bit': True, 'bnb_4bit_quant_type': "nf4", 'bnb_4bit_use_double_quant': True}
                                 ],
                 'peft_config': [
                                False,
                                # {'r': 8,
                                # 'lora_alpha': 32,
                                # 'lora_dropout': 0.1,
                                # 'bias': "none",
                                # 'target_modules': ("q_proj", "v_proj",)
                                # 'target_modules': "all-linear"
                                # }
                                ],
                    }


# overwrite = {
#             'mistralai/Mistral-7B-v0.1' : {'training_params':{'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'gradient_checkpointing': True,},
#                'peft_config' : LoraConfig(task_type=TaskType.SEQ_CLS,
#                                                     r=8,
#                                                     lora_alpha=32,
#                                                     lora_dropout=0.1,
#                                                     bias="none",
#                                                     target_modules='all-linear'
#                                                     # target_modules=[
#                                                          # "q_proj",
#                                                          # "v_proj",
#                                                             # ]
#                                                 )
#             }}

compute_dtype = getattr(torch, "float16")
print(compute_dtype)

# Controls whether to save logs during a process. When set to False, logging is disabled.
save_logs = True

run_params_serie = param_combinations(run_config)
print(len(run_params_serie))
run_params_serie

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Jakub\.cache\huggingface\token
Login successful
torch.float16
2


[{'data_path': 'data/PolarIs-Pathos.xlsx',
  'report_to': 'wandb',
  'wandb_init_params': {'project': 'test', 'group': 'binary'},
  'model_name': 'distilbert/distilbert-base-uncased-finetuned-sst-2-english',
  'split': (0.98, 0.02),
  'binary': False,
  'balanced': False,
  'training_arguments': {'num_train_epochs': 1,
   'per_device_train_batch_size': 256,
   'per_device_eval_batch_size': 256,
   'gradient_checkpointing': True,
   'save_total_limit': 2,
   'load_best_model_at_end': True,
   'save_strategy': 'steps',
   'metric_for_best_model': 'f1-score',
   'evaluation_strategy': 'steps',
   'logging_steps': 20,
   'max_steps': 20,
   'fp16': False,
   'learning_rate': 5e-05,
   'lr_scheduler_type': 'linear',
   'warmup_ratio': 0.1,
   'max_grad_norm': 0.3,
   'weight_decay': 0.001},
  'bnb_config': False,
  'peft_config': False},
 {'data_path': 'data/PolarIs-Pathos.xlsx',
  'report_to': 'wandb',
  'wandb_init_params': {'project': 'test', 'group': 'binary'},
  'model_name': 'michelle

In [3]:
# run_params_serie[0] = copy.deepcopy(run_params_serie[0])

# run_params_serie[0]['training_arguments']['per_device_train_batch_size'] = 2056
# run_params_serie

In [5]:
from csv import writer

for run_params in run_params_serie:
    print('new_model')
    looped = True
    while looped:
        try:
            print(run_params['training_arguments']['per_device_train_batch_size'])
            
            df = load_predefined_dataset(run_params['data_path'], True)
            p, tr = single_run(run_params, df)
            
            print(p['training_arguments']['per_device_train_batch_size'])
            
            to_log = p | {'train_samples_per_second': parse_log_history(tr.state.log_history)[0]['train_samples_per_second']}
            print(to_log.values())
            
            with open('testing_batchsizes.csv', 'a+', newline='') as f_object:
                writer_object = writer(f_object)
                writer_object.writerow(to_log.values())
            
            run_params['training_arguments']['per_device_train_batch_size']*=2
            run_params['training_arguments']['per_device_eval_batch_size']*=2

            
            cuda_poststats = get_cuda_memory(0)
            print(cuda_poststats['reserved'])
            print(cuda_poststats['total_memory'])
            if cuda_poststats['reserved']>cuda_poststats['total_memory']-1:
                print('reserved higher than available, exiting loop for this model')
                looped=False
            # looped = False
            clean_memory(tr)
        except:
            print('broke')
            looped = False
            clean_memory()

new_model
256


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mjakubpart[0m ([33mjpartyka[0m). Use [1m`wandb login --relogin`[0m to force relogin


            text
label           
No_pathos  10637
Pathos      4951


Casting to class labels:   0%|          | 0/15588 [00:00<?, ? examples/s]



Map:   0%|          | 0/15276 [00:00<?, ? examples/s]

Map:   0%|          | 0/312 [00:00<?, ? examples/s]

cuda:0
Model to cuda


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.
0.285 GB of memory reserved.




Step,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
20,1.2576,0.691579,0.555833,0.556168,0.555993,0.625


25.4422 seconds used for training.
0.42 minutes used for training.
Peak reserved memory = 10.359 GB.
Peak reserved memory for training = 10.074 GB.
Peak reserved memory % of max memory = 103.59 %.
Peak reserved memory for training % of max memory = 100.74 %.


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁
eval/f1-score,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁▁▁

0,1
eval/accuracy,0.625
eval/f1-score,0.55599
eval/loss,0.69158
eval/precision,0.55583
eval/recall,0.55617
eval/runtime,3.8706
eval/samples_per_second,80.608
eval/steps_per_second,0.517
train/epoch,0.33
train/global_step,20.0


256
dict_values(['data/PolarIs-Pathos.xlsx', 'wandb', {'project': 'test', 'group': 'binary'}, 'distilbert/distilbert-base-uncased-finetuned-sst-2-english', (0.98, 0.02), False, False, {'num_train_epochs': 1, 'per_device_train_batch_size': 256, 'per_device_eval_batch_size': 256, 'gradient_checkpointing': True, 'save_total_limit': 2, 'load_best_model_at_end': True, 'save_strategy': 'steps', 'metric_for_best_model': 'f1-score', 'evaluation_strategy': 'steps', 'logging_steps': 20, 'max_steps': 20, 'fp16': False, 'learning_rate': 5e-05, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.1, 'max_grad_norm': 0.3, 'weight_decay': 0.001}, False, False, 201.241])
10.359
10.0
reserved higher than available, exiting loop for this model
new_model
256


            text
label           
No_pathos  10637
Pathos      4951


Casting to class labels:   0%|          | 0/15588 [00:00<?, ? examples/s]



Map:   0%|          | 0/15276 [00:00<?, ? examples/s]

Map:   0%|          | 0/312 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at michellejieli/emotion_text_classifier and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([7, 768]) in the checkpoint and torch.Size([2, 768]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([7]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda:0
Model to cuda


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)


GPU = NVIDIA GeForce RTX 3080. Max memory = 10.0 GB.
1.389 GB of memory reserved.




Step,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
20,0.6613,0.589637,0.676068,0.696028,0.681351,0.714744


25.5537 seconds used for training.
0.43 minutes used for training.
Peak reserved memory = 9.344 GB.
Peak reserved memory for training = 7.955 GB.
Peak reserved memory % of max memory = 93.44 %.
Peak reserved memory for training % of max memory = 79.55 %.


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.9146118721461187, max=1.0…

0,1
eval/accuracy,▁
eval/f1-score,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁▁▁

0,1
eval/accuracy,0.71474
eval/f1-score,0.68135
eval/loss,0.58964
eval/precision,0.67607
eval/recall,0.69603
eval/runtime,4.0083
eval/samples_per_second,77.839
eval/steps_per_second,0.499
train/epoch,0.33
train/global_step,20.0


256
dict_values(['data/PolarIs-Pathos.xlsx', 'wandb', {'project': 'test', 'group': 'binary'}, 'michellejieli/emotion_text_classifier', (0.98, 0.02), False, False, {'num_train_epochs': 1, 'per_device_train_batch_size': 256, 'per_device_eval_batch_size': 256, 'gradient_checkpointing': True, 'save_total_limit': 2, 'load_best_model_at_end': True, 'save_strategy': 'steps', 'metric_for_best_model': 'f1-score', 'evaluation_strategy': 'steps', 'logging_steps': 20, 'max_steps': 20, 'fp16': False, 'learning_rate': 5e-05, 'lr_scheduler_type': 'linear', 'warmup_ratio': 0.1, 'max_grad_norm': 0.3, 'weight_decay': 0.001}, False, False, 200.363])
9.344
10.0
reserved higher than available, exiting loop for this model
