In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
my_drive = GoogleDrive(gauth)

In [3]:
def clear_trash():
  for a_file in my_drive.ListFile({'q': "trashed = true"}).GetList():
    # print the name of the file being deleted.
    try:
      print(f"the file {a_file['title']}, is about to get deleted permanently.")
      # delete the file permanently.
      a_file.Delete()
    except:
      pass

In [4]:
# clear_trash()

In [5]:
# !cp /content/drive/MyDrive/TalkLikeMom/conversations-main.zip /content
# !unzip conversations-main.zip

In [6]:
cd /content/conversations-main/Classifier/

/content/conversations-main/Classifier


In [7]:
# import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [8]:
import torch
torch.cuda.is_available()

True

In [9]:
# !pip uninstall --yes torchaudio torchdata torchtext torchvision

In [10]:
# !pip install -q torch==1.13.0

In [11]:
torch.__version__

'2.0.1+cu118'

In [12]:
import sys
sys.path.append('..')

In [13]:
from utils import load_jsonl, dump_jsonl, set_random_seed

In [14]:
from data_loader import get_task1_conver, get_task2_conver, preprocess

In [15]:
import pandas as pd

In [16]:
# df = get_task1_conver("../Task1//annotated_conersations.jsonl", "closeness", skips = ["4. Don't like each other"], only_user=False)
# # print(df[0]["text"][0])
# pd.concat(df).groupby("label").count()

In [17]:
# df = get_task2_conver("../Task2/annotated/annotated.jsonl", "closeness", skips = [], only_user=False)
# # print(df[0]["text"][0])
# pd.concat(df).groupby("label").count()

In [18]:
# df = get_task1_conver("../Task3/annotated/annotated.jsonl", "closeness", skips = [], only_user=False)
# # print(df[0]["text"][0])
# pd.concat(df).groupby("label").count()

In [19]:
# "DONE"

In [20]:
# !pip install -q transformers pythainlp datasets evaluate sentencepiece
# # !pip install -q accelerate -U
# !pip install -q transformers[torch]
# # !pip install -q
# !pip install -q shap nlp

In [21]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# import wandb
# from sklearn.metrics import precision_recall_fscore_support, classification_report
# from pythainlp.tokenize import word_tokenize
import torch
import datasets
from datasets import Dataset, DatasetDict
from transformers import DataCollatorWithPadding
import numpy as np
import evaluate
from transformers import TrainingArguments, Trainer



In [22]:
import random
def set_random_seed(seed=42):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    np.random.RandomState(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) #seed all gpus
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.benchmark = False

In [60]:
from sklearn.utils import compute_class_weight
import torch.nn as nn
import os, shutil
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import mean_squared_error, r2_score

def run_exp(out_dir, df, seed=42, report="none", regressor_configs=None):

    set_random_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)
    # device = torch.device("cpu")
    print("START")
    print("step 1: load data")
    train, val, test = df

#     train = train.head(100)
#     val = val.head(100)
#     test = test.head(100)

    print("step 2: load tokenizer")
    model_name = "airesearch/wangchanberta-base-att-spm-uncased"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    num_added_toks = tokenizer.add_special_tokens({"additional_special_tokens": ["usr", "sys", "rep"]})

    print("step 3: init data")
    ds = DatasetDict()
    ds['train'] = Dataset.from_pandas(train)
    ds['val'] = Dataset.from_pandas(val)
    ds['test'] = Dataset.from_pandas(test)

    if regressor_configs is None:
        labels = train["label"].unique()
        num_labels = len(labels)
        print(labels)

        class_weights = compute_class_weight("balanced", classes=labels, y=train["label"].values)
        class_weights = torch.tensor(class_weights).float().to(device)

        id2label = {i:l for i, l in enumerate(labels)}
        label2id = {l:i for i, l in enumerate(labels)}

        def word_tokenize(d, tokenizer=None, label2id=None, max_length=256):
            texts = [preprocess(t) for t in d["text"]]
    #         print(texts)
            tokens = tokenizer(texts, truncation=True, max_length=max_length)
            num = [len(t) for t in tokens["input_ids"]]
    #         print(num)
    #         print("AVG", len(num), sum(num)/len(num))
            tokens["label"] = [label2id[label] for label in d["label"]]
            return tokens
    else:
#         labels = train["label"].unique()
        num_labels = 1

        id2label = {1: regressor_configs["label"]}
        label2id = [regressor_configs["label"]]

        def word_tokenize(d, tokenizer=None, label2id=None, max_length=256):
            texts = [preprocess(t) for t in d["text"]]
    #         print(texts)
            tokens = tokenizer(texts, padding='max_length', truncation=True, max_length=max_length)
            # num = [len(t) for t in tokens["input_ids"]]
    #         print(num)
    #         print("AVG", len(num), sum(num)/len(num))
            tokens["label"] = [regressor_configs["label_fn"](label) for label in d["label"]]
            return tokens



    tokenized_ds = ds.map(word_tokenize, batched=True, fn_kwargs={"tokenizer":tokenizer, "label2id": label2id, "max_length":max_length})
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    print("step 4: load model")
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels, id2label=id2label, label2id=label2id);
    model.resize_token_embeddings(len(tokenizer)+1)
    model = model.to(device)

    # print(model)

    if regressor_configs is None:
        metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])
        def compute_metrics(eval_pred):
            predictions, labels = eval_pred
            predictions = np.argmax(predictions, axis=1)
            return metrics.compute(predictions=predictions, references=labels, average="macro")
    else:
        label_fn = regressor_configs["label_fn"]
        def compute_metrics(eval_pred):
            predictions, actual = eval_pred
            predictions = predictions.reshape(-1)

            predicted_labels = [label_fn(p) for p in predictions]
            actual_labels = [label_fn(p) for p in actual]
            p, r, f1, _ = precision_recall_fscore_support(actual_labels, predicted_labels, average='macro')

            return {
                "r2_score": r2_score(actual, predictions),
                "mean_squared_error": np.sqrt(mean_squared_error(actual, predictions)),
                "accuracy": accuracy_score(actual_labels, predicted_labels),
                "f1": f1,
                "precision": p,
                "recall": r,
            }


    print("step 5: fine-tune")

    training_args = TrainingArguments(
        output_dir=out_dir,
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_epochs,
        weight_decay=0.01,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        report_to=report,
        metric_for_best_model="f1",
        save_total_limit=2,
        load_best_model_at_end=True,
        push_to_hub=False,
        run_name=out_dir,
    )

    # print(training_args)

    if regressor_configs is None:
        class CustomTrainer(Trainer):
            def compute_loss(self, model, inputs, return_outputs=False):
                labels = inputs.get("labels")
                # forward pass
                outputs = model(**inputs)

                logits = outputs.get("logits")

                loss_fct = nn.CrossEntropyLoss(weight=class_weights)
                loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
                return (loss, outputs) if return_outputs else loss
    else:
        CustomTrainer = Trainer

    trainer = CustomTrainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_ds["train"],
        eval_dataset=tokenized_ds["val"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()
    best_ckpt_path = trainer.state.best_model_checkpoint
    print(best_ckpt_path)

    modle_out_path = out_dir+"/best_model"
    if os.path.exists(modle_out_path):
        shutil.rmtree(modle_out_path)

    os.rename(best_ckpt_path, modle_out_path)
    best_ckpt_path = modle_out_path

    print("step 6: evaluate")
    eval_test = trainer.evaluate(tokenized_ds["test"])
    eval_val = trainer.evaluate(tokenized_ds["val"])
    print(eval_test)

    print("DONE")

    return trainer, eval_val, eval_test

## Task1: Train Model

In [61]:
report = "none"
batch_size = 16
max_length = 128
num_epochs = 20

In [62]:
# import os
# stream = os.popen('nohup python3 run_train_task_classifier.py > train2.out &')
# output = stream.read()
# output

In [63]:
# df[0]["label"].value_counts().loc[['1. Close', '2. Know each other', "3. Don't know each other"]].plot.bar()

In [64]:
# df[1]["label"].value_counts().loc[['1. Close', '2. Know each other', "3. Don't know each other"]].plot.bar()

In [65]:
# df[2]["label"].value_counts().loc[['1. Close', '2. Know each other', "3. Don't know each other"]].plot.bar()

In [67]:
def closeness_label_fn(label):
    if label == '1. Close':
        return 1
    elif label =='2. Know each other':
        return 0.5
    elif label == "3. Don't know each other":
        return 0
    elif type(label)==str:
        assert(False)

    # [0, 0.33) =>
    # [0.33, 0.66) =>
    # [0.66, 1] =>

    if label > 0.66:
        return '1. Close'
    elif label > 0.33:
        return '2. Know each other'
    else:
        return "3. Don't know each other"


best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task1_conver("../Task1/annotated_conersations.jsonl", "closeness", skips = ["4. Don't like each other"], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task1_clse_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "close",
      "not_label": "not_close",
      "label_fn": closeness_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 1234 records from ../Task1/annotated_conersations.jsonl
N 1096 60 60
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1096 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.212616,-0.008788,0.461103,0.3,0.306777,0.421308,0.355475
2,No log,0.185213,0.121228,0.430364,0.516667,0.437445,0.478788,0.440016
3,No log,0.126377,0.400386,0.355496,0.616667,0.52397,0.572095,0.519209
4,No log,0.117333,0.443299,0.342538,0.683333,0.559495,0.575758,0.554635
5,No log,0.1047,0.503237,0.323573,0.65,0.527708,0.560317,0.523062
6,No log,0.088889,0.578251,0.298143,0.733333,0.639416,0.656744,0.638026
7,No log,0.099324,0.528745,0.315156,0.733333,0.621497,0.634104,0.61807
8,0.107900,0.094639,0.550969,0.307635,0.733333,0.66936,0.66835,0.67351
9,0.107900,0.08966,0.574597,0.299432,0.733333,0.668412,0.667633,0.676098
10,0.107900,0.084346,0.599809,0.290423,0.766667,0.697236,0.699288,0.69732


./Regressors/task1_clse_usr0/checkpoint-1311
step 6: evaluate


{'eval_loss': 0.052886586636304855, 'eval_r2_score': 0.7408755737689448, 'eval_mean_squared_error': 0.22997082769870758, 'eval_accuracy': 0.8666666666666667, 'eval_f1': 0.7854166666666668, 'eval_precision': 0.80007215007215, 'eval_recall': 0.7766427718040622, 'eval_runtime': 0.157, 'eval_samples_per_second': 382.094, 'eval_steps_per_second': 25.473, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.052886586636304855, 'eval_r2_score': 0.7408755737689448, 'eval_mean_squared_error': 0.22997082769870758, 'eval_accuracy': 0.8666666666666667, 'eval_f1': 0.7854166666666668, 'eval_precision': 0.80007215007215, 'eval_recall': 0.7766427718040622, 'eval_runtime': 0.157, 'eval_samples_per_second': 382.094, 'eval_steps_per_second': 25.473, 'epoch': 20.0}
Result[test] {'eval_loss': 0.07594796270132065, 'eval_r2_score': 0.6396538098734016, 'eval_mean_squared_error': 0.2755865752696991, 'eval_accuracy': 0.8, 'eval_f1': 0.7272274086830417, 'eval_precision': 0.7370711725550435, 'eval_re

Map:   0%|          | 0/1096 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.131163,0.37768,0.362164,0.6,0.525005,0.566667,0.51248
2,No log,0.113559,0.461203,0.336985,0.766667,0.713675,0.709848,0.72504
3,No log,0.099308,0.528817,0.315132,0.733333,0.683551,0.689504,0.701231
4,No log,0.107138,0.491669,0.327319,0.75,0.661367,0.669176,0.657695
5,No log,0.122352,0.419484,0.349788,0.683333,0.62776,0.64441,0.635208
6,No log,0.096656,0.5414,0.310896,0.683333,0.601412,0.626654,0.599724
7,No log,0.111144,0.472663,0.333382,0.683333,0.61075,0.625169,0.607488
8,0.084200,0.162957,0.226828,0.403679,0.583333,0.529228,0.564103,0.538647
9,0.084200,0.107154,0.491591,0.327344,0.7,0.624308,0.633333,0.621981
10,0.084200,0.122184,0.420282,0.349548,0.65,0.582571,0.606926,0.578502


./Regressors/task1_clse_usr1/checkpoint-138
step 6: evaluate


{'eval_loss': 0.10785043984651566, 'eval_r2_score': 0.471573306773154, 'eval_mean_squared_error': 0.32840588688850403, 'eval_accuracy': 0.6666666666666666, 'eval_f1': 0.5727753727753728, 'eval_precision': 0.6005876068376068, 'eval_recall': 0.559557945041816, 'eval_runtime': 0.1553, 'eval_samples_per_second': 386.454, 'eval_steps_per_second': 25.764, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.10785043984651566, 'eval_r2_score': 0.471573306773154, 'eval_mean_squared_error': 0.32840588688850403, 'eval_accuracy': 0.6666666666666666, 'eval_f1': 0.5727753727753728, 'eval_precision': 0.6005876068376068, 'eval_recall': 0.559557945041816, 'eval_runtime': 0.1553, 'eval_samples_per_second': 386.454, 'eval_steps_per_second': 25.764, 'epoch': 20.0}
Result[test] {'eval_loss': 0.11355898529291153, 'eval_r2_score': 0.4612028992375019, 'eval_mean_squared_error': 0.33698511123657227, 'eval_accuracy': 0.7666666666666667, 'eval_f1': 0.7136752136752137, 'eval_precision': 0.7098484848

Map:   0%|          | 0/1096 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.100818,0.521653,0.317519,0.666667,0.611212,0.63671,0.615539
2,No log,0.113767,0.460214,0.337294,0.733333,0.69453,0.714286,0.723775
3,No log,0.099885,0.526083,0.316045,0.666667,0.611212,0.63671,0.615539
4,No log,0.14389,0.317294,0.379328,0.65,0.486532,0.527665,0.492754
5,No log,0.105577,0.499074,0.324926,0.683333,0.612471,0.630159,0.610076
6,No log,0.094615,0.551086,0.307595,0.666667,0.615995,0.640566,0.620715
7,No log,0.110686,0.474836,0.332694,0.75,0.594961,0.615215,0.602254
8,0.084100,0.096624,0.541553,0.310844,0.666667,0.616928,0.639301,0.623303
9,0.084100,0.10511,0.501288,0.324207,0.783333,0.710857,0.727539,0.704049
10,0.084100,0.104518,0.504097,0.323293,0.7,0.622655,0.637073,0.619393


./Regressors/task1_clse_usr2/checkpoint-621
step 6: evaluate


{'eval_loss': 0.07296253740787506, 'eval_r2_score': 0.6425109350831879, 'eval_mean_squared_error': 0.27011576294898987, 'eval_accuracy': 0.75, 'eval_f1': 0.6369772998805256, 'eval_precision': 0.6369772998805256, 'eval_recall': 0.6369772998805256, 'eval_runtime': 0.1549, 'eval_samples_per_second': 387.256, 'eval_steps_per_second': 25.817, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.07296253740787506, 'eval_r2_score': 0.6425109350831879, 'eval_mean_squared_error': 0.27011576294898987, 'eval_accuracy': 0.75, 'eval_f1': 0.6369772998805256, 'eval_precision': 0.6369772998805256, 'eval_recall': 0.6369772998805256, 'eval_runtime': 0.1549, 'eval_samples_per_second': 387.256, 'eval_steps_per_second': 25.817, 'epoch': 20.0}
Result[test] {'eval_loss': 0.10511039197444916, 'eval_r2_score': 0.5012884665536235, 'eval_mean_squared_error': 0.3242073357105255, 'eval_accuracy': 0.7833333333333333, 'eval_f1': 0.71085702040379, 'eval_precision': 0.7275385433280169, 'eval_recall': 0.70

Map:   0%|          | 0/1096 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.120452,0.428498,0.347062,0.6,0.537449,0.57549,0.535024
2,No log,0.097547,0.537176,0.312324,0.75,0.682084,0.683415,0.682827
3,No log,0.101529,0.518281,0.318636,0.733333,0.684874,0.680918,0.701231
4,No log,0.115184,0.453494,0.339387,0.7,0.534392,0.536341,0.543996
5,No log,0.087298,0.585804,0.295462,0.733333,0.667959,0.673016,0.670922
6,No log,0.102683,0.512806,0.320442,0.683333,0.579207,0.604798,0.574592
7,No log,0.096054,0.54426,0.309925,0.716667,0.634209,0.64471,0.631297
8,0.081200,0.09132,0.566719,0.302192,0.716667,0.636213,0.645238,0.633885
9,0.081200,0.099446,0.528163,0.315351,0.666667,0.599222,0.616588,0.595583
10,0.081200,0.09674,0.541001,0.311031,0.733333,0.648701,0.655556,0.64579


./Regressors/task1_clse_usr3/checkpoint-207
step 6: evaluate


{'eval_loss': 0.09156220406293869, 'eval_r2_score': 0.5513794379116762, 'eval_mean_squared_error': 0.30259251594543457, 'eval_accuracy': 0.7333333333333333, 'eval_f1': 0.6466709760827408, 'eval_precision': 0.6813725490196078, 'eval_recall': 0.6347670250896057, 'eval_runtime': 0.1732, 'eval_samples_per_second': 346.364, 'eval_steps_per_second': 23.091, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.09156220406293869, 'eval_r2_score': 0.5513794379116762, 'eval_mean_squared_error': 0.30259251594543457, 'eval_accuracy': 0.7333333333333333, 'eval_f1': 0.6466709760827408, 'eval_precision': 0.6813725490196078, 'eval_recall': 0.6347670250896057, 'eval_runtime': 0.1732, 'eval_samples_per_second': 346.364, 'eval_steps_per_second': 23.091, 'epoch': 20.0}
Result[test] {'eval_loss': 0.10152889788150787, 'eval_r2_score': 0.5182813134667994, 'eval_mean_squared_error': 0.3186360001564026, 'eval_accuracy': 0.7333333333333333, 'eval_f1': 0.6848739495798318, 'eval_precision': 0.6809178

Map:   0%|          | 0/1096 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.138625,0.342276,0.372323,0.55,0.51877,0.57365,0.524442
2,No log,0.252261,-0.196891,0.502256,0.266667,0.235317,0.343569,0.248505
3,No log,0.21195,-0.005627,0.46038,0.183333,0.152778,0.548485,0.334599
4,No log,0.219781,-0.042781,0.468808,0.183333,0.141197,0.386905,0.359731
5,No log,0.209669,0.005194,0.457896,0.183333,0.142886,0.385057,0.362319
6,No log,0.272995,-0.295265,0.522489,0.383333,0.184739,0.127778,0.333333
7,No log,0.237655,-0.127588,0.487499,0.166667,0.116013,0.384181,0.347826
8,0.207400,0.25482,-0.209031,0.504797,0.383333,0.191667,0.134503,0.333333
9,0.207400,0.277451,-0.316407,0.526736,0.383333,0.184739,0.127778,0.333333
10,0.207400,0.236625,-0.122702,0.486441,0.183333,0.142886,0.385057,0.362319


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted s

./Regressors/task1_clse_usr4/checkpoint-69
step 6: evaluate


{'eval_loss': 0.10695171356201172, 'eval_r2_score': 0.4759766765699701, 'eval_mean_squared_error': 0.32703471183776855, 'eval_accuracy': 0.6333333333333333, 'eval_f1': 0.6041083099906629, 'eval_precision': 0.6555555555555556, 'eval_recall': 0.616905615292712, 'eval_runtime': 0.154, 'eval_samples_per_second': 389.652, 'eval_steps_per_second': 25.977, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.10695171356201172, 'eval_r2_score': 0.4759766765699701, 'eval_mean_squared_error': 0.32703471183776855, 'eval_accuracy': 0.6333333333333333, 'eval_f1': 0.6041083099906629, 'eval_precision': 0.6555555555555556, 'eval_recall': 0.616905615292712, 'eval_runtime': 0.154, 'eval_samples_per_second': 389.652, 'eval_steps_per_second': 25.977, 'epoch': 20.0}
Result[test] {'eval_loss': 0.13862453401088715, 'eval_r2_score': 0.34227571271991475, 'eval_mean_squared_error': 0.3723231256008148, 'eval_accuracy': 0.55, 'eval_f1': 0.5187700433880399, 'eval_precision': 0.5736495388669302, 'eval_

In [45]:
# !zip -r task1.zip Regressors/task1_*

In [46]:
# !cp -r Regressors/task1_* /content/drive/MyDrive/TalkLikeMom/src/Classifier/Regressors

In [68]:
# !mkdir ShapleyValuesV2

In [69]:
import shap
import scipy as sp

import pickle

def load_shap_values(filepath):
    with open(filepath, 'rb') as fin:
        obj = pickle.load(fin)
    return obj

def save_shap_values(filepath, obj):
    with open(filepath, 'wb') as fin:
        pickle.dump(obj, fin)


def get_shapley(df, trainer, shap_path):
    train, val, test = df
    train["split"] = "train"
    val["split"] = "val"
    test["split"] = "test"

    df = pd.concat([train, test, val])
    print("DATA SIZE", len(df))
    def f(x):
        tokenizer = trainer.tokenizer
        model = trainer.model

        # print(type(x.tolist()))
        tv = tokenizer(x.tolist(), padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
        input_ids = tv["input_ids"].cuda()
        attention_mask = tv["attention_mask"].cuda()
        # tv = tokenizer(x, truncation=True, max_length=max_length).cuda()
        outputs = model(input_ids, attention_mask=attention_mask)[0].detach().cpu().numpy()

        # scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
        # val = sp.special.logit(scores[:,1]) # use one vs rest logit units

        val = outputs.reshape(-1)
        return val

    explainer = shap.Explainer(f, trainer.tokenizer)
    shap_values = explainer({
        "label": df["label"].values,
        "text": [preprocess(t) for t in df["text"].values],
    })

    save_shap_values(shap_path, shap_values)

    return shap_values

get_shapley(df, best_trainer, "./ShapleyValuesV2/task1_clse_regressor.pkl");

DATA SIZE 1216


PartitionExplainer explainer: 1217it [38:08,  1.89s/it]


In [70]:
cp ./ShapleyValuesV2/task1_clse_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task1_clse_regressor.pkl

In [91]:
def authority1_label_fn(label):
    if label == '0. Very respect':
        return 1
    elif label =='1. Respect':
        return 0.5
    elif label == "2. Normal":
        return 0
    elif type(label)==str:
        assert(False)

    # [0, 0.33) =>
    # [0.33, 0.66) =>
    # [0.66, 1] =>

    if label > 0.66:
        return '0. Very respect'
    elif label > 0.33:
        return '1. Respect'
    else:
        return "2. Normal"

In [92]:
best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task1_conver("../Task1/annotated_conersations.jsonl", "authority", skips = ["3. Not respect"], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task1_auth_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "respect",
      "not_label": "not_respect",
      "label_fn": authority1_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 1234 records from ../Task1/annotated_conersations.jsonl
N 1098 61 61
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1098 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.159298,-0.0211,0.399122,0.442623,0.324934,0.321321,0.401307
2,No log,0.140629,0.09857,0.375005,0.442623,0.309783,0.297491,0.351634
3,No log,0.143445,0.080517,0.378742,0.42623,0.359148,0.681259,0.421895
4,No log,0.157433,-0.009145,0.396778,0.42623,0.428374,0.593434,0.488235
5,No log,0.140397,0.100058,0.374696,0.377049,0.323164,0.659259,0.380065
6,No log,0.138957,0.109286,0.37277,0.442623,0.369777,0.455128,0.419281
7,No log,0.145278,0.068768,0.381154,0.360656,0.350769,0.492857,0.406209
8,0.160100,0.126253,0.190716,0.355322,0.52459,0.401199,0.662177,0.418627
9,0.160100,0.142071,0.089323,0.376924,0.508197,0.371168,0.463675,0.383987
10,0.160100,0.13819,0.114205,0.371739,0.442623,0.35873,0.478244,0.382026


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task1_auth_usr0/checkpoint-759
step 6: evaluate


{'eval_loss': 0.1666111797094345, 'eval_r2_score': 0.03807558555836943, 'eval_mean_squared_error': 0.4081803858280182, 'eval_accuracy': 0.47540983606557374, 'eval_f1': 0.4588304093567251, 'eval_precision': 0.5035897435897435, 'eval_recall': 0.4532738095238095, 'eval_runtime': 0.153, 'eval_samples_per_second': 398.664, 'eval_steps_per_second': 26.142, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.1666111797094345, 'eval_r2_score': 0.03807558555836943, 'eval_mean_squared_error': 0.4081803858280182, 'eval_accuracy': 0.47540983606557374, 'eval_f1': 0.4588304093567251, 'eval_precision': 0.5035897435897435, 'eval_recall': 0.4532738095238095, 'eval_runtime': 0.153, 'eval_samples_per_second': 398.664, 'eval_steps_per_second': 26.142, 'epoch': 20.0}
Result[test] {'eval_loss': 0.13774709403514862, 'eval_r2_score': 0.11704220119995146, 'eval_mean_squared_error': 0.3711429834365845, 'eval_accuracy': 0.5081967213114754, 'eval_f1': 0.4569872958257713, 'eval_precision': 0.50674603

Map:   0%|          | 0/1098 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.150808,0.03332,0.388341,0.409836,0.305342,0.36087,0.406536
2,No log,0.139326,0.106919,0.373265,0.52459,0.322308,0.290741,0.363399
3,No log,0.138079,0.114912,0.37159,0.590164,0.39961,0.361556,0.452288
4,No log,0.156115,-0.000693,0.395113,0.360656,0.35197,0.454927,0.393791
5,No log,0.139374,0.106615,0.373328,0.491803,0.458969,0.568519,0.484641
6,No log,0.127253,0.184308,0.356726,0.557377,0.387955,0.356209,0.445098
7,No log,0.143982,0.077078,0.379449,0.590164,0.39961,0.361556,0.452288
8,0.160300,0.135788,0.129598,0.368495,0.557377,0.372873,0.336111,0.420261
9,0.160300,0.135018,0.134538,0.367447,0.442623,0.316494,0.312999,0.364052
10,0.160300,0.156906,-0.005765,0.396113,0.52459,0.362104,0.327327,0.413072


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted s

./Regressors/task1_auth_usr1/checkpoint-966
step 6: evaluate


{'eval_loss': 0.1506463885307312, 'eval_r2_score': 0.1302482446528449, 'eval_mean_squared_error': 0.3881318271160126, 'eval_accuracy': 0.45901639344262296, 'eval_f1': 0.41323249472655244, 'eval_precision': 0.47103174603174597, 'eval_recall': 0.4058531746031746, 'eval_runtime': 0.1568, 'eval_samples_per_second': 389.071, 'eval_steps_per_second': 25.513, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.1506463885307312, 'eval_r2_score': 0.1302482446528449, 'eval_mean_squared_error': 0.3881318271160126, 'eval_accuracy': 0.45901639344262296, 'eval_f1': 0.41323249472655244, 'eval_precision': 0.47103174603174597, 'eval_recall': 0.4058531746031746, 'eval_runtime': 0.1568, 'eval_samples_per_second': 389.071, 'eval_steps_per_second': 25.513, 'epoch': 20.0}
Result[test] {'eval_loss': 0.11435497552156448, 'eval_r2_score': 0.26698556000331464, 'eval_mean_squared_error': 0.33816415071487427, 'eval_accuracy': 0.6229508196721312, 'eval_f1': 0.5375526215078453, 'eval_precision': 0.637

Map:   0%|          | 0/1098 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.150062,0.038107,0.387378,0.360656,0.268889,0.310648,0.352288
2,No log,0.1228,0.212855,0.350428,0.52459,0.391287,0.645515,0.406209
3,No log,0.127119,0.185168,0.356538,0.557377,0.507017,0.63125,0.523856
4,No log,0.140073,0.102134,0.374263,0.47541,0.488485,0.554762,0.528758
5,No log,0.12125,0.222791,0.348209,0.590164,0.528038,0.615873,0.511765
6,No log,0.112334,0.279938,0.335163,0.540984,0.502108,0.554029,0.50719
7,No log,0.121838,0.219021,0.349053,0.590164,0.575195,0.612346,0.584967
8,0.141600,0.118978,0.237355,0.344931,0.557377,0.54361,0.582475,0.565359
9,0.141600,0.119901,0.231436,0.346267,0.622951,0.551198,0.604108,0.543791
10,0.141600,0.124819,0.199914,0.353297,0.557377,0.530167,0.54744,0.528105


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task1_auth_usr2/checkpoint-483
step 6: evaluate


{'eval_loss': 0.13839861750602722, 'eval_r2_score': 0.20095987380031177, 'eval_mean_squared_error': 0.3720197081565857, 'eval_accuracy': 0.4918032786885246, 'eval_f1': 0.4762020905923345, 'eval_precision': 0.5225308641975309, 'eval_recall': 0.4770833333333333, 'eval_runtime': 0.1573, 'eval_samples_per_second': 387.825, 'eval_steps_per_second': 25.431, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.13839861750602722, 'eval_r2_score': 0.20095987380031177, 'eval_mean_squared_error': 0.3720197081565857, 'eval_accuracy': 0.4918032786885246, 'eval_f1': 0.4762020905923345, 'eval_precision': 0.5225308641975309, 'eval_recall': 0.4770833333333333, 'eval_runtime': 0.1573, 'eval_samples_per_second': 387.825, 'eval_steps_per_second': 25.431, 'epoch': 20.0}
Result[test] {'eval_loss': 0.12183768302202225, 'eval_r2_score': 0.21902146221485086, 'eval_mean_squared_error': 0.3490526080131531, 'eval_accuracy': 0.5901639344262295, 'eval_f1': 0.5751950497713211, 'eval_precision': 0.612345

Map:   0%|          | 0/1098 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.152022,0.025537,0.389901,0.409836,0.344795,0.456349,0.412092
2,No log,0.149617,0.040956,0.386804,0.52459,0.308333,0.276812,0.35098
3,No log,0.129953,0.167002,0.36049,0.491803,0.492877,0.682432,0.502614
4,No log,0.134474,0.138023,0.366707,0.508197,0.497469,0.625397,0.512418
5,No log,0.121357,0.222104,0.348363,0.540984,0.5067,0.611905,0.494771
6,No log,0.116038,0.256197,0.340644,0.557377,0.514964,0.660273,0.492157
7,No log,0.119474,0.234175,0.34565,0.590164,0.537876,0.601975,0.536601
8,0.131000,0.131959,0.154147,0.363261,0.508197,0.49026,0.527691,0.505556
9,0.131000,0.127897,0.180181,0.357627,0.57377,0.48921,0.553975,0.483987
10,0.131000,0.146609,0.060239,0.382895,0.508197,0.487329,0.495652,0.511111


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task1_auth_usr3/checkpoint-759
step 6: evaluate


{'eval_loss': 0.19690397381782532, 'eval_r2_score': -0.13681861944585783, 'eval_mean_squared_error': 0.4437386095523834, 'eval_accuracy': 0.5081967213114754, 'eval_f1': 0.44272741990080505, 'eval_precision': 0.4896214896214896, 'eval_recall': 0.4371031746031746, 'eval_runtime': 0.1592, 'eval_samples_per_second': 383.247, 'eval_steps_per_second': 25.131, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.19690397381782532, 'eval_r2_score': -0.13681861944585783, 'eval_mean_squared_error': 0.4437386095523834, 'eval_accuracy': 0.5081967213114754, 'eval_f1': 0.44272741990080505, 'eval_precision': 0.4896214896214896, 'eval_recall': 0.4371031746031746, 'eval_runtime': 0.1592, 'eval_samples_per_second': 383.247, 'eval_steps_per_second': 25.131, 'epoch': 20.0}
Result[test] {'eval_loss': 0.1255347728729248, 'eval_r2_score': 0.19532327975803943, 'eval_mean_squared_error': 0.3543088436126709, 'eval_accuracy': 0.6065573770491803, 'eval_f1': 0.5410242778663831, 'eval_precision': 0.598

Map:   0%|          | 0/1098 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.156547,-0.003463,0.39566,0.508197,0.326349,0.294715,0.366013
2,No log,0.173132,-0.109776,0.416092,0.508197,0.234848,0.191358,0.303922
3,No log,0.133511,0.144198,0.365391,0.47541,0.405749,0.527778,0.40719
4,No log,0.121028,0.224212,0.347891,0.590164,0.522876,0.578431,0.511765
5,No log,0.128388,0.177036,0.358312,0.622951,0.60837,0.650285,0.616993
6,No log,0.136734,0.123533,0.369776,0.590164,0.486869,0.661499,0.468954
7,No log,0.124425,0.202439,0.352739,0.557377,0.486722,0.550694,0.486601
8,0.140200,0.125406,0.196148,0.354127,0.590164,0.511111,0.553333,0.518627
9,0.140200,0.121857,0.218901,0.34908,0.704918,0.623529,0.701852,0.617647
10,0.140200,0.131749,0.155488,0.362973,0.639344,0.534942,0.635714,0.523203


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task1_auth_usr4/checkpoint-621
step 6: evaluate


{'eval_loss': 0.2092660665512085, 'eval_r2_score': -0.20819062150641, 'eval_mean_squared_error': 0.4574560225009918, 'eval_accuracy': 0.5081967213114754, 'eval_f1': 0.45588235294117646, 'eval_precision': 0.4837962962962963, 'eval_recall': 0.4489087301587302, 'eval_runtime': 0.1587, 'eval_samples_per_second': 384.288, 'eval_steps_per_second': 25.199, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.2092660665512085, 'eval_r2_score': -0.20819062150641, 'eval_mean_squared_error': 0.4574560225009918, 'eval_accuracy': 0.5081967213114754, 'eval_f1': 0.45588235294117646, 'eval_precision': 0.4837962962962963, 'eval_recall': 0.4489087301587302, 'eval_runtime': 0.1587, 'eval_samples_per_second': 384.288, 'eval_steps_per_second': 25.199, 'epoch': 20.0}
Result[test] {'eval_loss': 0.12185657024383545, 'eval_r2_score': 0.21890051071793581, 'eval_mean_squared_error': 0.34907957911491394, 'eval_accuracy': 0.7049180327868853, 'eval_f1': 0.6235294117647059, 'eval_precision': 0.701851851

In [93]:
get_shapley(df, best_trainer, "./ShapleyValuesV2/task1_auth_regressor.pkl");

DATA SIZE 1220


PartitionExplainer explainer: 1221it [38:23,  1.90s/it]


In [94]:
cp ./ShapleyValuesV2/task1_auth_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task1_auth_regressor.pkl

## Task2: Train Model

In [78]:
best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task2_conver("../Task2/annotated/annotated.jsonl", "closeness", seed=i, skips = ["4. Don't like each other"], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task2_clse_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "close",
      "not_label": "not_close",
      "label_fn": closeness_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 2486 records from ../Task2/annotated/annotated.jsonl
N 1495 186 186
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.129758,-0.097314,0.36022,0.543011,0.298918,0.310866,0.361941
2,No log,0.116695,0.013152,0.341607,0.763441,0.291282,0.262963,0.326437
3,No log,0.131329,-0.110602,0.362394,0.462366,0.278195,0.324393,0.376117
4,No log,0.126454,-0.069369,0.355603,0.774194,0.290909,0.259459,0.331034
5,No log,0.205223,-0.735494,0.453016,0.102151,0.065394,0.253188,0.319413
6,0.133000,0.124442,-0.052359,0.352764,0.774194,0.291793,0.26087,0.331034
7,0.133000,0.11841,-0.001351,0.344108,0.77957,0.292044,0.259857,0.333333
8,0.133000,0.122742,-0.037981,0.350345,0.77957,0.292044,0.259857,0.333333
9,0.133000,0.167896,-0.419834,0.409752,0.188172,0.133971,0.334174,0.372414
10,0.133000,0.121236,-0.025247,0.34819,0.774194,0.292683,0.262295,0.331034


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted s

./Regressors/task2_clse_usr0/checkpoint-1880
step 6: evaluate


{'eval_loss': 0.11331980675458908, 'eval_r2_score': 0.06617948870413892, 'eval_mean_squared_error': 0.33663007616996765, 'eval_accuracy': 0.7365591397849462, 'eval_f1': 0.36540137422490365, 'eval_precision': 0.6534701857282502, 'eval_recall': 0.4161111111111111, 'eval_runtime': 0.472, 'eval_samples_per_second': 394.063, 'eval_steps_per_second': 25.423, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.11331980675458908, 'eval_r2_score': 0.06617948870413892, 'eval_mean_squared_error': 0.33663007616996765, 'eval_accuracy': 0.7365591397849462, 'eval_f1': 0.36540137422490365, 'eval_precision': 0.6534701857282502, 'eval_recall': 0.4161111111111111, 'eval_runtime': 0.472, 'eval_samples_per_second': 394.063, 'eval_steps_per_second': 25.423, 'epoch': 20.0}
Result[test] {'eval_loss': 0.0990162342786789, 'eval_r2_score': 0.16265809441666368, 'eval_mean_squared_error': 0.3146684765815735, 'eval_accuracy': 0.7419354838709677, 'eval_f1': 0.46869830064260887, 'eval_precision': 0.5521

Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.095819,0.037816,0.309547,0.752688,0.312198,0.304225,0.327312
2,No log,0.122386,-0.228959,0.349837,0.672043,0.311732,0.316425,0.351593
3,No log,0.106147,-0.065895,0.325803,0.752688,0.291667,0.284553,0.299145
4,No log,0.109111,-0.095657,0.33032,0.822581,0.338481,0.326311,0.355089
5,No log,0.110597,-0.110578,0.332562,0.790323,0.29697,0.281609,0.314103
6,0.124400,0.097452,0.021419,0.312173,0.817204,0.300692,0.279926,0.324786
7,0.124400,0.132308,-0.32859,0.363742,0.666667,0.297173,0.301536,0.32129
8,0.124400,0.111867,-0.123328,0.334465,0.66129,0.346676,0.4134,0.390893
9,0.124400,0.096859,0.027373,0.311222,0.795699,0.356897,0.477692,0.359813
10,0.124400,0.089378,0.102495,0.298962,0.784946,0.321245,0.305604,0.340132


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task2_clse_usr1/checkpoint-1222
step 6: evaluate


{'eval_loss': 0.11269693821668625, 'eval_r2_score': 0.2802873518210338, 'eval_mean_squared_error': 0.33570367097854614, 'eval_accuracy': 0.6935483870967742, 'eval_f1': 0.5622378093260759, 'eval_precision': 0.6540650406504066, 'eval_recall': 0.568555900621118, 'eval_runtime': 0.4868, 'eval_samples_per_second': 382.114, 'eval_steps_per_second': 24.652, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.11269693821668625, 'eval_r2_score': 0.2802873518210338, 'eval_mean_squared_error': 0.33570367097854614, 'eval_accuracy': 0.6935483870967742, 'eval_f1': 0.5622378093260759, 'eval_precision': 0.6540650406504066, 'eval_recall': 0.568555900621118, 'eval_runtime': 0.4868, 'eval_samples_per_second': 382.114, 'eval_steps_per_second': 24.652, 'epoch': 20.0}
Result[test] {'eval_loss': 0.10994257777929306, 'eval_r2_score': -0.10400504168267655, 'eval_mean_squared_error': 0.3315759003162384, 'eval_accuracy': 0.6774193548387096, 'eval_f1': 0.36485072773732563, 'eval_precision': 0.416730

Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.116984,0.113042,0.34203,0.709677,0.37495,0.343038,0.425138
2,No log,0.119985,0.090291,0.346389,0.731183,0.413046,0.524849,0.444576
3,No log,0.120446,0.086798,0.347053,0.731183,0.367594,0.338992,0.402285
4,No log,0.14424,-0.09361,0.37979,0.537634,0.402811,0.472083,0.457841
5,No log,0.121606,0.078004,0.34872,0.677419,0.397752,0.449275,0.430917
6,0.115100,0.108537,0.177091,0.329449,0.693548,0.425663,0.50591,0.41949
7,0.115100,0.104782,0.20556,0.3237,0.677419,0.413263,0.491989,0.418571
8,0.115100,0.1019,0.227411,0.319217,0.698925,0.448641,0.500403,0.447991
9,0.115100,0.109617,0.168898,0.331085,0.709677,0.467021,0.515258,0.4627
10,0.115100,0.111238,0.156611,0.333523,0.736559,0.517072,0.567337,0.510638


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task2_clse_usr2/checkpoint-1880
step 6: evaluate


{'eval_loss': 0.10126261413097382, 'eval_r2_score': -0.031818350866210876, 'eval_mean_squared_error': 0.3182178735733032, 'eval_accuracy': 0.7580645161290323, 'eval_f1': 0.4398086720068144, 'eval_precision': 0.46875059202424935, 'eval_recall': 0.4326797385620915, 'eval_runtime': 0.4867, 'eval_samples_per_second': 382.168, 'eval_steps_per_second': 24.656, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.10126261413097382, 'eval_r2_score': -0.031818350866210876, 'eval_mean_squared_error': 0.3182178735733032, 'eval_accuracy': 0.7580645161290323, 'eval_f1': 0.4398086720068144, 'eval_precision': 0.46875059202424935, 'eval_recall': 0.4326797385620915, 'eval_runtime': 0.4867, 'eval_samples_per_second': 382.168, 'eval_steps_per_second': 24.656, 'epoch': 20.0}
Result[test] {'eval_loss': 0.10225020349025726, 'eval_r2_score': 0.22475392828634355, 'eval_mean_squared_error': 0.3197658360004425, 'eval_accuracy': 0.7473118279569892, 'eval_f1': 0.5359546165884194, 'eval_precision': 0.

Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.099944,0.101264,0.316139,0.768817,0.319466,0.304491,0.339671
2,No log,0.107233,0.035716,0.327465,0.704301,0.326353,0.319894,0.369776
3,No log,0.103191,0.072064,0.321234,0.688172,0.319993,0.315766,0.36324
4,No log,0.148241,-0.333044,0.385021,0.61828,0.337061,0.487455,0.404139
5,No log,0.12383,-0.113533,0.351896,0.435484,0.250509,0.323249,0.373341
6,0.130000,0.114261,-0.027481,0.338025,0.811828,0.300498,0.276557,0.328976
7,0.130000,0.122628,-0.102719,0.350183,0.806452,0.365657,0.352793,0.383046
8,0.130000,0.122068,-0.097681,0.349382,0.77957,0.321905,0.303186,0.344029
9,0.130000,0.158402,-0.424411,0.397997,0.489247,0.296018,0.659187,0.379976
10,0.130000,0.113128,-0.017292,0.336345,0.795699,0.356944,0.64333,0.363537


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task2_clse_usr3/checkpoint-1692
step 6: evaluate


{'eval_loss': 0.09752047061920166, 'eval_r2_score': 0.07363592556350851, 'eval_mean_squared_error': 0.31228265166282654, 'eval_accuracy': 0.7634408602150538, 'eval_f1': 0.36296296296296293, 'eval_precision': 0.64788994236847, 'eval_recall': 0.36911027568922306, 'eval_runtime': 0.4791, 'eval_samples_per_second': 388.268, 'eval_steps_per_second': 25.05, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.09752047061920166, 'eval_r2_score': 0.07363592556350851, 'eval_mean_squared_error': 0.31228265166282654, 'eval_accuracy': 0.7634408602150538, 'eval_f1': 0.36296296296296293, 'eval_precision': 0.64788994236847, 'eval_recall': 0.36911027568922306, 'eval_runtime': 0.4791, 'eval_samples_per_second': 388.268, 'eval_steps_per_second': 25.05, 'epoch': 20.0}
Result[test] {'eval_loss': 0.090037040412426, 'eval_r2_score': 0.19035120983730713, 'eval_mean_squared_error': 0.30006173253059387, 'eval_accuracy': 0.7741935483870968, 'eval_f1': 0.39432339049956955, 'eval_precision': 0.677209

Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.103519,0.104216,0.321744,0.725806,0.402179,0.517516,0.4258
2,No log,0.091967,0.204178,0.303261,0.688172,0.374359,0.417371,0.395244
3,No log,0.09385,0.187889,0.306349,0.725806,0.508274,0.608937,0.504335
4,No log,0.092707,0.19778,0.304478,0.741935,0.483328,0.588647,0.469739
5,No log,0.09014,0.219993,0.300233,0.736559,0.446706,0.494025,0.440236
6,0.112800,0.081157,0.297722,0.284881,0.790323,0.551066,0.607942,0.530598
7,0.112800,0.090962,0.212872,0.3016,0.774194,0.476119,0.627282,0.456439
8,0.112800,0.098495,0.147694,0.313839,0.801075,0.530198,0.67209,0.493687
9,0.112800,0.08599,0.255901,0.29324,0.811828,0.565477,0.676543,0.525505
10,0.112800,0.093868,0.187727,0.306379,0.795699,0.486656,0.557437,0.462668


./Regressors/task2_clse_usr4/checkpoint-846
step 6: evaluate


{'eval_loss': 0.09288256615400314, 'eval_r2_score': 0.16017895090620626, 'eval_mean_squared_error': 0.3047664165496826, 'eval_accuracy': 0.7688172043010753, 'eval_f1': 0.403699540112417, 'eval_precision': 0.5, 'eval_recall': 0.40138037196860726, 'eval_runtime': 0.4876, 'eval_samples_per_second': 381.462, 'eval_steps_per_second': 24.61, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.09288256615400314, 'eval_r2_score': 0.16017895090620626, 'eval_mean_squared_error': 0.3047664165496826, 'eval_accuracy': 0.7688172043010753, 'eval_f1': 0.403699540112417, 'eval_precision': 0.5, 'eval_recall': 0.40138037196860726, 'eval_runtime': 0.4876, 'eval_samples_per_second': 381.462, 'eval_steps_per_second': 24.61, 'epoch': 20.0}
Result[test] {'eval_loss': 0.08598995208740234, 'eval_r2_score': 0.25590090592993253, 'eval_mean_squared_error': 0.29324042797088623, 'eval_accuracy': 0.8118279569892473, 'eval_f1': 0.5654769434655583, 'eval_precision': 0.6765432098765433, 'eval_recall': 0.52

In [80]:
report = "none"
batch_size = 16
max_length = 128
num_epochs = 100

In [82]:
df = get_task2_conver("../Task2/annotated/annotated.jsonl", "closeness", seed=42, skips = ["4. Don't like each other"], only_user=True)
trainer, eval_test, eval_val = run_exp(f"./Regressors/task2_clse_usr{i}", df, seed=42, report=report, regressor_configs={
    "label": "close",
    "not_label": "not_close",
    "label_fn": closeness_label_fn,
})

print("Result[val]", eval_val)
print("Result[test]", eval_test)

Loaded 2486 records from ../Task2/annotated/annotated.jsonl
N 1495 186 186
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1495 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/186 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.114168,0.093053,0.337888,0.61828,0.431471,0.674068,0.458771
2,No log,0.120355,0.0439,0.346923,0.747312,0.312443,0.294697,0.342908
3,No log,0.088404,0.297725,0.297327,0.731183,0.481485,0.666948,0.458842
4,No log,0.08733,0.306251,0.295517,0.736559,0.449241,0.686036,0.456903
5,No log,0.112012,0.110176,0.334683,0.752688,0.434903,0.657949,0.417754
6,0.106600,0.085138,0.323665,0.291784,0.768817,0.547919,0.709732,0.529267
7,0.106600,0.086346,0.314067,0.293848,0.736559,0.50593,0.63733,0.486478
8,0.106600,0.094282,0.251026,0.307054,0.774194,0.453008,0.694785,0.444846
9,0.106600,0.08413,0.331673,0.290052,0.752688,0.492542,0.638983,0.485934
10,0.106600,0.101224,0.195878,0.318157,0.77957,0.485453,0.643155,0.472482


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task2_clse_usr4/checkpoint-4324
step 6: evaluate


{'eval_loss': 0.1276169717311859, 'eval_r2_score': -0.013727498563620877, 'eval_mean_squared_error': 0.3572351634502411, 'eval_accuracy': 0.7634408602150538, 'eval_f1': 0.39485936832790564, 'eval_precision': 0.4395663956639566, 'eval_recall': 0.3880727773345223, 'eval_runtime': 0.4885, 'eval_samples_per_second': 380.795, 'eval_steps_per_second': 24.567, 'epoch': 100.0}
DONE
Result[val] {'eval_loss': 0.1276169717311859, 'eval_r2_score': -0.013727498563620877, 'eval_mean_squared_error': 0.3572351634502411, 'eval_accuracy': 0.7634408602150538, 'eval_f1': 0.39485936832790564, 'eval_precision': 0.4395663956639566, 'eval_recall': 0.3880727773345223, 'eval_runtime': 0.4885, 'eval_samples_per_second': 380.795, 'eval_steps_per_second': 24.567, 'epoch': 100.0}
Result[test] {'eval_loss': 0.08259650319814682, 'eval_r2_score': 0.34385560339569476, 'eval_mean_squared_error': 0.28739607334136963, 'eval_accuracy': 0.7795698924731183, 'eval_f1': 0.5721034839214372, 'eval_precision': 0.6136111111111111,

In [83]:
report = "none"
batch_size = 16
max_length = 128
num_epochs = 20

In [84]:
get_shapley(df, best_trainer, "./ShapleyValuesV2/task2_clse_regressor.pkl");

DATA SIZE 1867


PartitionExplainer explainer: 1868it [41:25,  1.34s/it]


In [85]:
cp ./ShapleyValuesV2/task2_clse_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task2_clse_regressor.pkl

In [86]:
def authority2_label_fn(label):
    if label == '1. Respect':
        return 1
    elif label =='2. Normal':
        return 0.5
    elif label == "3. Not respect":
        return 0
    elif type(label)==str:
        assert(False)

    if label > 0.66:
        return '1. Respect'
    elif label > 0.33:
        return '2. Normal'
    else:
        return "3. Not respect"

In [88]:
best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task2_conver("../Task2/annotated/annotated.jsonl", "authority", seed=i, skips = [], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task2_auth_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "respect",
      "not_label": "not_respect",
      "label_fn": authority2_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 2486 records from ../Task2/annotated/annotated.jsonl
N 1876 234 234
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1876 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.062488,0.086107,0.249977,0.568376,0.54792,0.575329,0.624387
2,No log,0.045758,0.330782,0.213912,0.692308,0.597263,0.580506,0.655882
3,No log,0.054328,0.205458,0.233083,0.619658,0.531139,0.512231,0.597181
4,No log,0.061347,0.102803,0.247683,0.598291,0.528382,0.560374,0.629657
5,0.066800,0.055914,0.182264,0.23646,0.769231,0.623798,0.662262,0.598162
6,0.066800,0.059143,0.135032,0.243194,0.683761,0.575848,0.561674,0.609681
7,0.066800,0.061162,0.105501,0.24731,0.713675,0.58528,0.581218,0.598039
8,0.066800,0.053637,0.21556,0.231596,0.74359,0.608825,0.626928,0.594853
9,0.057300,0.05137,0.248714,0.22665,0.739316,0.613634,0.625454,0.61826
10,0.057300,0.041984,0.385979,0.204901,0.769231,0.705564,0.674093,0.758824


./Regressors/task2_auth_usr0/checkpoint-2360
step 6: evaluate


{'eval_loss': 0.0347767136991024, 'eval_r2_score': 0.4736777907133505, 'eval_mean_squared_error': 0.186485156416893, 'eval_accuracy': 0.8076923076923077, 'eval_f1': 0.7300322859315348, 'eval_precision': 0.7169197147458016, 'eval_recall': 0.7748680867695915, 'eval_runtime': 0.5787, 'eval_samples_per_second': 404.329, 'eval_steps_per_second': 25.919, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.0347767136991024, 'eval_r2_score': 0.4736777907133505, 'eval_mean_squared_error': 0.186485156416893, 'eval_accuracy': 0.8076923076923077, 'eval_f1': 0.7300322859315348, 'eval_precision': 0.7169197147458016, 'eval_recall': 0.7748680867695915, 'eval_runtime': 0.5787, 'eval_samples_per_second': 404.329, 'eval_steps_per_second': 25.919, 'epoch': 20.0}
Result[test] {'eval_loss': 0.033283431082963943, 'eval_r2_score': 0.5132298474651797, 'eval_mean_squared_error': 0.182437464594841, 'eval_accuracy': 0.8034188034188035, 'eval_f1': 0.7534862798020693, 'eval_precision': 0.7099338454972

Map:   0%|          | 0/1876 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.069885,0.161201,0.264357,0.636752,0.473797,0.507407,0.459087
2,No log,0.064438,0.22657,0.253847,0.692308,0.516044,0.673203,0.558153
3,No log,0.056112,0.326503,0.236881,0.598291,0.57233,0.637824,0.616374
4,No log,0.053625,0.356365,0.23157,0.666667,0.60792,0.604255,0.644422
5,0.074600,0.050551,0.393261,0.224834,0.709402,0.684281,0.692696,0.734773
6,0.074600,0.044503,0.465847,0.210957,0.74359,0.696357,0.676124,0.740846
7,0.074600,0.040458,0.514395,0.201142,0.769231,0.723014,0.700754,0.754105
8,0.074600,0.042881,0.485318,0.207077,0.739316,0.687737,0.672235,0.719681
9,0.047600,0.039147,0.530128,0.197857,0.769231,0.715909,0.701853,0.734638
10,0.047600,0.040377,0.515374,0.200939,0.713675,0.66956,0.653477,0.713495


./Regressors/task2_auth_usr1/checkpoint-1298
step 6: evaluate


{'eval_loss': 0.03297119215130806, 'eval_r2_score': 0.4427869007038694, 'eval_mean_squared_error': 0.18157970905303955, 'eval_accuracy': 0.7863247863247863, 'eval_f1': 0.7062366009734431, 'eval_precision': 0.6625192012288786, 'eval_recall': 0.7846341805363616, 'eval_runtime': 0.5656, 'eval_samples_per_second': 413.739, 'eval_steps_per_second': 26.522, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.03297119215130806, 'eval_r2_score': 0.4427869007038694, 'eval_mean_squared_error': 0.18157970905303955, 'eval_accuracy': 0.7863247863247863, 'eval_f1': 0.7062366009734431, 'eval_precision': 0.6625192012288786, 'eval_recall': 0.7846341805363616, 'eval_runtime': 0.5656, 'eval_samples_per_second': 413.739, 'eval_steps_per_second': 26.522, 'epoch': 20.0}
Result[test] {'eval_loss': 0.03633056953549385, 'eval_r2_score': 0.5639376669873667, 'eval_mean_squared_error': 0.1906057894229889, 'eval_accuracy': 0.7863247863247863, 'eval_f1': 0.7530484853117277, 'eval_precision': 0.7295443

Map:   0%|          | 0/1876 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.070743,-0.052615,0.265976,0.581197,0.458563,0.659475,0.532353
2,No log,0.052901,0.212868,0.230002,0.641026,0.532264,0.597442,0.626634
3,No log,0.038007,0.434483,0.194953,0.74359,0.701657,0.662457,0.789216
4,No log,0.037574,0.440917,0.193841,0.713675,0.718723,0.714489,0.807353
5,0.062900,0.033267,0.505016,0.182391,0.782051,0.740412,0.69573,0.824346
6,0.062900,0.046473,0.308508,0.215577,0.662393,0.612609,0.63443,0.730392
7,0.062900,0.039222,0.416405,0.198045,0.747863,0.686329,0.687435,0.782353
8,0.062900,0.035164,0.476783,0.187521,0.705128,0.707524,0.701453,0.784314
9,0.032100,0.039784,0.408041,0.199459,0.747863,0.679751,0.690015,0.769608
10,0.032100,0.032366,0.518411,0.179906,0.794872,0.731503,0.699704,0.803922


./Regressors/task2_auth_usr2/checkpoint-590
step 6: evaluate


{'eval_loss': 0.041273921728134155, 'eval_r2_score': 0.44007566400115916, 'eval_mean_squared_error': 0.20315982401371002, 'eval_accuracy': 0.7649572649572649, 'eval_f1': 0.7076677294680107, 'eval_precision': 0.6765525939438982, 'eval_recall': 0.7582463288345641, 'eval_runtime': 0.5718, 'eval_samples_per_second': 409.247, 'eval_steps_per_second': 26.234, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.041273921728134155, 'eval_r2_score': 0.44007566400115916, 'eval_mean_squared_error': 0.20315982401371002, 'eval_accuracy': 0.7649572649572649, 'eval_f1': 0.7076677294680107, 'eval_precision': 0.6765525939438982, 'eval_recall': 0.7582463288345641, 'eval_runtime': 0.5718, 'eval_samples_per_second': 409.247, 'eval_steps_per_second': 26.234, 'epoch': 20.0}
Result[test] {'eval_loss': 0.03326653316617012, 'eval_r2_score': 0.5050156564908929, 'eval_mean_squared_error': 0.18239116668701172, 'eval_accuracy': 0.782051282051282, 'eval_f1': 0.7404120764779764, 'eval_precision': 0.695

Map:   0%|          | 0/1876 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.040179,0.503382,0.200446,0.769231,0.727368,0.703469,0.772081
2,No log,0.039898,0.506853,0.199744,0.75641,0.715471,0.685297,0.767844
3,No log,0.041601,0.485798,0.203964,0.75641,0.69832,0.675,0.7352
4,No log,0.039413,0.51285,0.198526,0.764957,0.710385,0.696572,0.751073
5,0.057300,0.037208,0.540101,0.192893,0.760684,0.709831,0.689906,0.756657
6,0.057300,0.037908,0.531447,0.1947,0.773504,0.721618,0.703911,0.757159
7,0.057300,0.05755,0.288665,0.239896,0.623932,0.590125,0.633749,0.656502
8,0.057300,0.033953,0.58033,0.184264,0.82906,0.794146,0.766382,0.836578
9,0.026700,0.043495,0.462392,0.208554,0.700855,0.667069,0.6651,0.732948
10,0.026700,0.031894,0.605785,0.178588,0.82906,0.784939,0.76001,0.819322


./Regressors/task2_auth_usr3/checkpoint-944
step 6: evaluate


{'eval_loss': 0.03425753489136696, 'eval_r2_score': 0.5414799030795114, 'eval_mean_squared_error': 0.1850879043340683, 'eval_accuracy': 0.7735042735042735, 'eval_f1': 0.7663545511306395, 'eval_precision': 0.7408824987135069, 'eval_recall': 0.823083793012871, 'eval_runtime': 0.5669, 'eval_samples_per_second': 412.786, 'eval_steps_per_second': 26.461, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.03425753489136696, 'eval_r2_score': 0.5414799030795114, 'eval_mean_squared_error': 0.1850879043340683, 'eval_accuracy': 0.7735042735042735, 'eval_f1': 0.7663545511306395, 'eval_precision': 0.7408824987135069, 'eval_recall': 0.823083793012871, 'eval_runtime': 0.5669, 'eval_samples_per_second': 412.786, 'eval_steps_per_second': 26.461, 'epoch': 20.0}
Result[test] {'eval_loss': 0.03395313769578934, 'eval_r2_score': 0.5803300506299593, 'eval_mean_squared_error': 0.18426376581192017, 'eval_accuracy': 0.8290598290598291, 'eval_f1': 0.7941461595824011, 'eval_precision': 0.7663819875

Map:   0%|          | 0/1876 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

Map:   0%|          | 0/234 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.050687,0.393914,0.225138,0.735043,0.683315,0.681266,0.686472
2,No log,0.040865,0.511363,0.20215,0.764957,0.704537,0.723504,0.730473
3,No log,0.056501,0.324393,0.2377,0.675214,0.639127,0.650983,0.713646
4,No log,0.042851,0.487609,0.207006,0.696581,0.707016,0.73901,0.745183
5,0.066400,0.035469,0.575888,0.188331,0.794872,0.764414,0.742192,0.793631
6,0.066400,0.03513,0.579933,0.187431,0.807692,0.783992,0.794841,0.781327
7,0.066400,0.039281,0.530304,0.198194,0.75641,0.723291,0.694079,0.782226
8,0.066400,0.037386,0.552955,0.193356,0.747863,0.741656,0.739884,0.769361
9,0.032200,0.035372,0.577043,0.188074,0.794872,0.752811,0.737672,0.780584
10,0.032200,0.040446,0.516367,0.201113,0.786325,0.735651,0.757681,0.758754


./Regressors/task2_auth_usr4/checkpoint-708
step 6: evaluate


{'eval_loss': 0.03569203242659569, 'eval_r2_score': 0.571319862367159, 'eval_mean_squared_error': 0.18892334401607513, 'eval_accuracy': 0.7692307692307693, 'eval_f1': 0.7466489832007074, 'eval_precision': 0.7442687890449085, 'eval_recall': 0.7816906963248428, 'eval_runtime': 0.5726, 'eval_samples_per_second': 408.655, 'eval_steps_per_second': 26.196, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.03569203242659569, 'eval_r2_score': 0.571319862367159, 'eval_mean_squared_error': 0.18892334401607513, 'eval_accuracy': 0.7692307692307693, 'eval_f1': 0.7466489832007074, 'eval_precision': 0.7442687890449085, 'eval_recall': 0.7816906963248428, 'eval_runtime': 0.5726, 'eval_samples_per_second': 408.655, 'eval_steps_per_second': 26.196, 'epoch': 20.0}
Result[test] {'eval_loss': 0.03513020649552345, 'eval_r2_score': 0.5799334888197065, 'eval_mean_squared_error': 0.18743053078651428, 'eval_accuracy': 0.8076923076923077, 'eval_f1': 0.7839922854387656, 'eval_precision': 0.79484126

In [89]:
get_shapley(df, best_trainer, "./ShapleyValuesV2/task2_auth_regressor.pkl");

DATA SIZE 2344


PartitionExplainer explainer: 2345it [53:15,  1.37s/it]


In [90]:
cp ./ShapleyValuesV2/task2_auth_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task2_auth_regressor.pkl

## Task3: Train Model

In [72]:
best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task1_conver("../Task3/annotated/annotated.jsonl", "closeness", skips = ["4. Don't like each other"], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task3_clse_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "close",
      "not_label": "not_close",
      "label_fn": closeness_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 1221 records from ../Task3/annotated/annotated.jsonl
N 1090 60 60
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.047994,0.422146,0.219075,0.8,0.547563,0.532738,0.564167
2,No log,0.106828,-0.286224,0.326846,0.483333,0.416713,0.427676,0.423194
3,No log,0.059386,0.284983,0.243693,0.633333,0.556633,0.640873,0.563611
4,No log,0.077719,0.064252,0.278781,0.666667,0.463636,0.461111,0.472083
5,No log,0.04758,0.427132,0.218128,0.7,0.608832,0.641827,0.599444
6,No log,0.046779,0.436779,0.216284,0.816667,0.718529,0.874808,0.675278
7,No log,0.049283,0.40663,0.221997,0.8,0.67619,0.721053,0.653194
8,0.074100,0.077451,0.067485,0.278299,0.766667,0.617431,0.633734,0.629444
9,0.074100,0.058971,0.289986,0.242839,0.683333,0.479941,0.486147,0.476667
10,0.074100,0.044889,0.459535,0.211869,0.833333,0.702165,0.729968,0.685694


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task3_clse_usr0/checkpoint-414
step 6: evaluate


{'eval_loss': 0.049443282186985016, 'eval_r2_score': 0.34740297471309256, 'eval_mean_squared_error': 0.22235846519470215, 'eval_accuracy': 0.7, 'eval_f1': 0.6074888074888075, 'eval_precision': 0.6490384615384616, 'eval_recall': 0.6183183183183183, 'eval_runtime': 0.1545, 'eval_samples_per_second': 388.442, 'eval_steps_per_second': 25.896, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.049443282186985016, 'eval_r2_score': 0.34740297471309256, 'eval_mean_squared_error': 0.22235846519470215, 'eval_accuracy': 0.7, 'eval_f1': 0.6074888074888075, 'eval_precision': 0.6490384615384616, 'eval_recall': 0.6183183183183183, 'eval_runtime': 0.1545, 'eval_samples_per_second': 388.442, 'eval_steps_per_second': 25.896, 'epoch': 20.0}
Result[test] {'eval_loss': 0.046778663992881775, 'eval_r2_score': 0.43677853210041817, 'eval_mean_squared_error': 0.21628376841545105, 'eval_accuracy': 0.8166666666666667, 'eval_f1': 0.7185285015473695, 'eval_precision': 0.8748079877112135, 'eval_recall

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.0637,0.233044,0.252388,0.75,0.599104,0.632143,0.619028
2,No log,0.041737,0.497479,0.204297,0.75,0.643315,0.67899,0.636528
3,No log,0.04164,0.498644,0.20406,0.783333,0.66703,0.697929,0.657361
4,No log,0.045381,0.453604,0.213029,0.8,0.651786,0.645833,0.661944
5,No log,0.040897,0.507599,0.202229,0.766667,0.655125,0.687176,0.644028
6,No log,0.055537,0.331328,0.235663,0.616667,0.542745,0.62004,0.553194
7,No log,0.044618,0.462795,0.211229,0.833333,0.702165,0.729968,0.685694
8,0.056000,0.044185,0.468011,0.210201,0.733333,0.596318,0.588771,0.623194
9,0.056000,0.050419,0.392947,0.224542,0.716667,0.601235,0.618038,0.615694
10,0.056000,0.042776,0.484968,0.206824,0.766667,0.637665,0.639506,0.644028


./Regressors/task3_clse_usr1/checkpoint-483
step 6: evaluate


{'eval_loss': 0.04462461546063423, 'eval_r2_score': 0.4110041741187326, 'eval_mean_squared_error': 0.21124538779258728, 'eval_accuracy': 0.7666666666666667, 'eval_f1': 0.5440190352584902, 'eval_precision': 0.5435897435897435, 'eval_recall': 0.5599099099099099, 'eval_runtime': 0.1584, 'eval_samples_per_second': 378.686, 'eval_steps_per_second': 25.246, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.04462461546063423, 'eval_r2_score': 0.4110041741187326, 'eval_mean_squared_error': 0.21124538779258728, 'eval_accuracy': 0.7666666666666667, 'eval_f1': 0.5440190352584902, 'eval_precision': 0.5435897435897435, 'eval_recall': 0.5599099099099099, 'eval_runtime': 0.1584, 'eval_samples_per_second': 378.686, 'eval_steps_per_second': 25.246, 'epoch': 20.0}
Result[test] {'eval_loss': 0.044617846608161926, 'eval_r2_score': 0.4627951863502182, 'eval_mean_squared_error': 0.2112293541431427, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7021650326797385, 'eval_precision': 0.729967

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.051149,0.384156,0.226162,0.733333,0.516543,0.514815,0.519583
2,No log,0.055279,0.334434,0.235115,0.716667,0.619639,0.675461,0.615694
3,No log,0.067804,0.183626,0.260393,0.533333,0.340317,0.426667,0.403333
4,No log,0.050274,0.394699,0.224218,0.733333,0.533688,0.55303,0.516667
5,No log,0.054878,0.339261,0.234261,0.716667,0.489967,0.484917,0.512083
6,No log,0.05762,0.306246,0.240042,0.633333,0.434295,0.454701,0.46
7,No log,0.050156,0.396113,0.223956,0.616667,0.415522,0.441799,0.4525
8,0.063900,0.038672,0.534378,0.196653,0.783333,0.667114,0.694743,0.651528
9,0.063900,0.036867,0.556112,0.192009,0.766667,0.655556,0.684921,0.644028
10,0.063900,0.041081,0.505375,0.202685,0.783333,0.540589,0.533951,0.559583


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task3_clse_usr2/checkpoint-1380
step 6: evaluate


{'eval_loss': 0.05227936804294586, 'eval_r2_score': 0.30996980424524345, 'eval_mean_squared_error': 0.22864681482315063, 'eval_accuracy': 0.75, 'eval_f1': 0.515032679738562, 'eval_precision': 0.5138248847926268, 'eval_recall': 0.5509009009009009, 'eval_runtime': 0.1544, 'eval_samples_per_second': 388.71, 'eval_steps_per_second': 25.914, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.05227936804294586, 'eval_r2_score': 0.30996980424524345, 'eval_mean_squared_error': 0.22864681482315063, 'eval_accuracy': 0.75, 'eval_f1': 0.515032679738562, 'eval_precision': 0.5138248847926268, 'eval_recall': 0.5509009009009009, 'eval_runtime': 0.1544, 'eval_samples_per_second': 388.71, 'eval_steps_per_second': 25.914, 'epoch': 20.0}
Result[test] {'eval_loss': 0.036007609218358994, 'eval_r2_score': 0.5664635685525004, 'eval_mean_squared_error': 0.18975670635700226, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7301587301587302, 'eval_precision': 0.8890168970814133, 'eval_recall': 0.

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.091715,-0.104255,0.302844,0.516667,0.477023,0.772894,0.493611
2,No log,0.060982,0.265766,0.246946,0.65,0.439376,0.480164,0.47625
3,No log,0.055185,0.33556,0.234916,0.716667,0.499193,0.495818,0.512083
4,No log,0.080499,0.030779,0.283724,0.533333,0.460718,0.527911,0.483611
5,No log,0.065762,0.208218,0.256441,0.716667,0.4903,0.504785,0.515
6,No log,0.045442,0.452868,0.213172,0.733333,0.501266,0.494721,0.525417
7,No log,0.046476,0.440422,0.215583,0.733333,0.631821,0.668889,0.623194
8,0.073800,0.045125,0.456687,0.212427,0.7,0.478473,0.472097,0.501667
9,0.073800,0.055431,0.332598,0.235439,0.683333,0.467372,0.45495,0.4825
10,0.073800,0.048246,0.419107,0.219651,0.733333,0.660892,0.818774,0.617361


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted s

./Regressors/task3_clse_usr3/checkpoint-1173
step 6: evaluate


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'eval_loss': 0.043520547449588776, 'eval_r2_score': 0.4255765935332467, 'eval_mean_squared_error': 0.20861580967903137, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.5662264905962385, 'eval_precision': 0.5524657026325547, 'eval_recall': 0.6036036036036037, 'eval_runtime': 0.1536, 'eval_samples_per_second': 390.67, 'eval_steps_per_second': 26.045, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.043520547449588776, 'eval_r2_score': 0.4255765935332467, 'eval_mean_squared_error': 0.20861580967903137, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.5662264905962385, 'eval_precision': 0.5524657026325547, 'eval_recall': 0.6036036036036037, 'eval_runtime': 0.1536, 'eval_samples_per_second': 390.67, 'eval_steps_per_second': 26.045, 'epoch': 20.0}
Result[test] {'eval_loss': 0.038543637841939926, 'eval_r2_score': 0.5359293987043179, 'eval_mean_squared_error': 0.19632534682750702, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.7299679487179488, 'eval_precision': 0.88618

Map:   0%|          | 0/1090 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

Map:   0%|          | 0/60 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.079497,0.04285,0.281951,0.616667,0.539683,0.652137,0.556111
2,No log,0.060752,0.268537,0.246479,0.716667,0.489744,0.516667,0.517917
3,No log,0.042141,0.492622,0.205282,0.783333,0.69556,0.856867,0.657361
4,No log,0.073381,0.116486,0.270889,0.816667,0.563301,0.552855,0.574583
5,No log,0.039103,0.529199,0.197744,0.8,0.678788,0.707937,0.667778
6,No log,0.038289,0.538995,0.195676,0.8,0.547563,0.532738,0.564167
7,No log,0.038984,0.530628,0.197443,0.8,0.547605,0.533333,0.567083
8,0.059100,0.061923,0.254443,0.248843,0.683333,0.590423,0.677579,0.600694
9,0.059100,0.129602,-0.560431,0.360003,0.516667,0.425568,0.43448,0.541806
10,0.059100,0.062767,0.244281,0.250533,0.716667,0.601471,0.601669,0.604028


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task3_clse_usr4/checkpoint-897
step 6: evaluate


{'eval_loss': 0.05288446322083473, 'eval_r2_score': 0.3019832575886975, 'eval_mean_squared_error': 0.2299662083387375, 'eval_accuracy': 0.75, 'eval_f1': 0.6370370370370372, 'eval_precision': 0.6628282828282829, 'eval_recall': 0.63003003003003, 'eval_runtime': 0.1557, 'eval_samples_per_second': 385.378, 'eval_steps_per_second': 25.692, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.05288446322083473, 'eval_r2_score': 0.3019832575886975, 'eval_mean_squared_error': 0.2299662083387375, 'eval_accuracy': 0.75, 'eval_f1': 0.6370370370370372, 'eval_precision': 0.6628282828282829, 'eval_recall': 0.63003003003003, 'eval_runtime': 0.1557, 'eval_samples_per_second': 385.378, 'eval_steps_per_second': 25.692, 'epoch': 20.0}
Result[test] {'eval_loss': 0.04255913197994232, 'eval_r2_score': 0.487582401406467, 'eval_mean_squared_error': 0.20629864931106567, 'eval_accuracy': 0.85, 'eval_f1': 0.7135572139303482, 'eval_precision': 0.7472049689440995, 'eval_recall': 0.6931944444444444, 'e

In [73]:
get_shapley(df, best_trainer, "./ShapleyValuesV2/task3_clse_regressor.pkl");

DATA SIZE 1210


PartitionExplainer explainer: 1211it [38:13,  1.90s/it]


In [74]:
cp ./ShapleyValuesV2/task3_clse_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task3_clse_regressor.pkl


In [95]:
best_trainer = None
best_eval_val = None
best_eval_test = None

for i in range(5):
  df = get_task1_conver("../Task3/annotated/annotated.jsonl", "authority", skips = [], only_user=True)
  trainer, eval_test, eval_val = run_exp(f"./Regressors/task3_auth_usr{i}", df, seed=i, report=report, regressor_configs={
      "label": "respect",
      "not_label": "not_respect",
      "label_fn": authority2_label_fn,
  })

  print(f"==== round {i+1} ====")
  print("Result[val]", eval_val)
  print("Result[test]", eval_test)

  if (best_eval_val is None) or (best_eval_val["eval_f1"] < eval_val["eval_f1"]):
    best_eval_val = eval_val
    best_eval_test = eval_test
    best_trainer = trainer

  del trainer

print()
print()
print("===================================")
print()
print("BEST MODEL")
print("Result[val]", best_eval_val)
print("Result[test]", best_eval_test)

Loaded 1221 records from ../Task3/annotated/annotated.jsonl
N 1099 61 61
cuda
START
step 1: load data
step 2: load tokenizer
step 3: init data


Map:   0%|          | 0/1099 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.036916,0.093304,0.192135,0.819672,0.586624,0.578205,0.606209
2,No log,0.048823,-0.199155,0.22096,0.836066,0.605794,0.634615,0.584967
3,No log,0.028851,0.291387,0.169856,0.868852,0.679987,0.724109,0.647059
4,No log,0.073052,-0.794223,0.270281,0.819672,0.515944,0.50719,0.529412
5,No log,0.049163,-0.20749,0.221727,0.622951,0.530492,0.584955,0.598039
6,No log,0.047081,-0.156347,0.21698,0.754098,0.601148,0.621189,0.650327
7,No log,0.054137,-0.329671,0.232674,0.491803,0.467082,0.552381,0.545752
8,0.037000,0.046044,-0.130889,0.214579,0.803279,0.611772,0.627778,0.620915
9,0.037000,0.052537,-0.290368,0.22921,0.819672,0.609524,0.597789,0.627451
10,0.037000,0.039646,0.026244,0.199114,0.786885,0.60064,0.618096,0.614379


./Regressors/task3_auth_usr0/checkpoint-207
step 6: evaluate


{'eval_loss': 0.022138534113764763, 'eval_r2_score': 0.180323329889995, 'eval_mean_squared_error': 0.14879024028778076, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.5564322469982846, 'eval_precision': 0.5243589743589744, 'eval_recall': 0.7407407407407408, 'eval_runtime': 0.1553, 'eval_samples_per_second': 392.763, 'eval_steps_per_second': 25.755, 'epoch': 20.0}
DONE
==== round 1 ====
Result[val] {'eval_loss': 0.022138534113764763, 'eval_r2_score': 0.180323329889995, 'eval_mean_squared_error': 0.14879024028778076, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.5564322469982846, 'eval_precision': 0.5243589743589744, 'eval_recall': 0.7407407407407408, 'eval_runtime': 0.1553, 'eval_samples_per_second': 392.763, 'eval_steps_per_second': 25.755, 'epoch': 20.0}
Result[test] {'eval_loss': 0.028851088136434555, 'eval_r2_score': 0.291386782487999, 'eval_mean_squared_error': 0.16985608637332916, 'eval_accuracy': 0.8688524590163934, 'eval_f1': 0.6799866799866799, 'eval_precision': 0.724109

Map:   0%|          | 0/1099 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.063678,-0.563996,0.252345,0.770492,0.582447,0.567829,0.684641
2,No log,0.048455,-0.190103,0.220125,0.754098,0.580462,0.602849,0.601307
3,No log,0.098416,-1.417192,0.313713,0.311475,0.199595,0.295678,0.320261
4,No log,0.05872,-0.442221,0.242322,0.770492,0.3865,0.371795,0.405229
5,No log,0.058941,-0.447644,0.242777,0.57377,0.268913,0.282084,0.277778
6,No log,0.099731,-1.4495,0.315802,0.098361,0.059701,0.032787,0.333333
7,No log,0.040463,0.006196,0.201153,0.803279,0.431068,0.405983,0.46732
8,0.059700,0.04252,-0.04434,0.206204,0.836066,0.458608,0.439153,0.480392
9,0.059700,0.08279,-1.033419,0.287733,0.245902,0.168966,0.371795,0.392157
10,0.059700,0.251849,-5.18567,0.501845,0.098361,0.059701,0.032787,0.333333


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted s

./Regressors/task3_auth_usr1/checkpoint-69
step 6: evaluate


{'eval_loss': 0.031332265585660934, 'eval_r2_score': -0.16007334634328996, 'eval_mean_squared_error': 0.1770092397928238, 'eval_accuracy': 0.8032786885245902, 'eval_f1': 0.517094017094017, 'eval_precision': 0.5705555555555556, 'eval_recall': 0.7283950617283951, 'eval_runtime': 0.1577, 'eval_samples_per_second': 386.87, 'eval_steps_per_second': 25.369, 'epoch': 20.0}
DONE
==== round 2 ====
Result[val] {'eval_loss': 0.031332265585660934, 'eval_r2_score': -0.16007334634328996, 'eval_mean_squared_error': 0.1770092397928238, 'eval_accuracy': 0.8032786885245902, 'eval_f1': 0.517094017094017, 'eval_precision': 0.5705555555555556, 'eval_recall': 0.7283950617283951, 'eval_runtime': 0.1577, 'eval_samples_per_second': 386.87, 'eval_steps_per_second': 25.369, 'epoch': 20.0}
Result[test] {'eval_loss': 0.06367788463830948, 'eval_r2_score': -0.5639960478035975, 'eval_mean_squared_error': 0.2523447871208191, 'eval_accuracy': 0.7704918032786885, 'eval_f1': 0.5824468085106383, 'eval_precision': 0.567829

Map:   0%|          | 0/1099 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.035073,0.138571,0.187278,0.836066,0.647475,0.627315,0.683007
2,No log,0.04769,-0.171311,0.21838,0.868852,0.667822,0.696581,0.647059
3,No log,0.035151,0.136654,0.187486,0.852459,0.651512,0.672269,0.640523
4,No log,0.033257,0.183175,0.182365,0.868852,0.747143,0.729592,0.772876
5,No log,0.033799,0.16987,0.183844,0.852459,0.66135,0.690171,0.640523
6,No log,0.054199,-0.331194,0.232808,0.721311,0.525362,0.568202,0.609477
7,No log,0.049746,-0.221811,0.223038,0.655738,0.474188,0.528509,0.534314
8,0.037700,0.032189,0.209415,0.179412,0.836066,0.61297,0.651153,0.584967
9,0.037700,0.033982,0.165376,0.184341,0.836066,0.633987,0.633987,0.633987
10,0.037700,0.039251,0.03596,0.198118,0.803279,0.571895,0.571895,0.571895


./Regressors/task3_auth_usr2/checkpoint-276
step 6: evaluate


{'eval_loss': 0.025615084916353226, 'eval_r2_score': 0.05160451578855041, 'eval_mean_squared_error': 0.16004714369773865, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.7304582210242588, 'eval_precision': 0.7243589743589745, 'eval_recall': 0.7407407407407408, 'eval_runtime': 0.1582, 'eval_samples_per_second': 385.506, 'eval_steps_per_second': 25.279, 'epoch': 20.0}
DONE
==== round 3 ====
Result[val] {'eval_loss': 0.025615084916353226, 'eval_r2_score': 0.05160451578855041, 'eval_mean_squared_error': 0.16004714369773865, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.7304582210242588, 'eval_precision': 0.7243589743589745, 'eval_recall': 0.7407407407407408, 'eval_runtime': 0.1582, 'eval_samples_per_second': 385.506, 'eval_steps_per_second': 25.279, 'epoch': 20.0}
Result[test] {'eval_loss': 0.033256907016038895, 'eval_r2_score': 0.18317523361232346, 'eval_mean_squared_error': 0.18236477673053741, 'eval_accuracy': 0.8688524590163934, 'eval_f1': 0.7471428571428572, 'eval_precision': 0.

Map:   0%|          | 0/1099 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.040801,-0.002114,0.201992,0.852459,0.619577,0.685185,0.591503
2,No log,0.051473,-0.26423,0.226876,0.57377,0.311129,0.326156,0.375817
3,No log,0.056236,-0.381225,0.237142,0.754098,0.580462,0.602849,0.601307
4,No log,0.060331,-0.481783,0.245623,0.672131,0.331533,0.327053,0.366013
5,No log,0.048703,-0.196202,0.220688,0.852459,0.58547,0.63522,0.563725
6,No log,0.061554,-0.511821,0.2481,0.590164,0.315873,0.327506,0.382353
7,No log,0.043998,-0.080629,0.209756,0.852459,0.640523,0.640523,0.640523
8,0.041900,0.039345,0.033651,0.198355,0.819672,0.62381,0.639456,0.627451
9,0.041900,0.035683,0.123589,0.188899,0.819672,0.62381,0.639456,0.627451
10,0.041900,0.041745,-0.025306,0.204316,0.786885,0.60064,0.618096,0.614379


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task3_auth_usr3/checkpoint-483
step 6: evaluate


{'eval_loss': 0.026028012856841087, 'eval_r2_score': 0.03631590659528117, 'eval_mean_squared_error': 0.16133202612400055, 'eval_accuracy': 0.8688524590163934, 'eval_f1': 0.5901234567901233, 'eval_precision': 0.6141975308641975, 'eval_recall': 0.7530864197530865, 'eval_runtime': 0.1531, 'eval_samples_per_second': 398.326, 'eval_steps_per_second': 26.12, 'epoch': 20.0}
DONE
==== round 4 ====
Result[val] {'eval_loss': 0.026028012856841087, 'eval_r2_score': 0.03631590659528117, 'eval_mean_squared_error': 0.16133202612400055, 'eval_accuracy': 0.8688524590163934, 'eval_f1': 0.5901234567901233, 'eval_precision': 0.6141975308641975, 'eval_recall': 0.7530864197530865, 'eval_runtime': 0.1531, 'eval_samples_per_second': 398.326, 'eval_steps_per_second': 26.12, 'epoch': 20.0}
Result[test] {'eval_loss': 0.04399766027927399, 'eval_r2_score': -0.08062887596372437, 'eval_mean_squared_error': 0.20975618064403534, 'eval_accuracy': 0.8524590163934426, 'eval_f1': 0.6405228758169934, 'eval_precision': 0.64

Map:   0%|          | 0/1099 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

Map:   0%|          | 0/61 [00:00<?, ? examples/s]

step 4: load model


Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at airesearch/wangchanberta-base-att-spm-uncased and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


step 5: fine-tune


You're using a CamembertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,R2 Score,Mean Squared Error,Accuracy,F1,Precision,Recall
1,No log,0.048106,-0.181533,0.219331,0.868852,0.659164,0.674359,0.647059
2,No log,0.034104,0.162358,0.184674,0.819672,0.630363,0.647222,0.627451
3,No log,0.04378,-0.075277,0.209236,0.803279,0.617143,0.632653,0.620915
4,No log,0.034721,0.14722,0.186335,0.836066,0.644976,0.665733,0.633987
5,No log,0.055884,-0.372558,0.236397,0.508197,0.304386,0.350934,0.447712
6,No log,0.066454,-0.632192,0.257788,0.52459,0.494241,0.584628,0.607843
7,No log,0.042695,-0.048627,0.206627,0.770492,0.590247,0.609903,0.607843
8,0.041100,0.03997,0.018287,0.199926,0.737705,0.393939,0.375,0.441176
9,0.041100,0.034684,0.148136,0.186235,0.836066,0.644976,0.665733,0.633987
10,0.041100,0.035661,0.124134,0.188841,0.852459,0.652692,0.667949,0.640523


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


./Regressors/task3_auth_usr4/checkpoint-69
step 6: evaluate


{'eval_loss': 0.028756296262145042, 'eval_r2_score': -0.06469832403861187, 'eval_mean_squared_error': 0.16957682371139526, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.43580246913580245, 'eval_precision': 0.46913580246913583, 'eval_recall': 0.41358024691358025, 'eval_runtime': 0.1531, 'eval_samples_per_second': 398.38, 'eval_steps_per_second': 26.123, 'epoch': 20.0}
DONE
==== round 5 ====
Result[val] {'eval_loss': 0.028756296262145042, 'eval_r2_score': -0.06469832403861187, 'eval_mean_squared_error': 0.16957682371139526, 'eval_accuracy': 0.8360655737704918, 'eval_f1': 0.43580246913580245, 'eval_precision': 0.46913580246913583, 'eval_recall': 0.41358024691358025, 'eval_runtime': 0.1531, 'eval_samples_per_second': 398.38, 'eval_steps_per_second': 26.123, 'epoch': 20.0}
Result[test] {'eval_loss': 0.04810596629977226, 'eval_r2_score': -0.18153345402199905, 'eval_mean_squared_error': 0.21933074295520782, 'eval_accuracy': 0.8688524590163934, 'eval_f1': 0.6591644601353339, 'eval_precisio

In [96]:
get_shapley(df, best_trainer, "./ShapleyValuesV2/task3_auth_regressor.pkl");

DATA SIZE 1221


PartitionExplainer explainer: 1222it [39:09,  1.93s/it]


In [97]:
cp ./ShapleyValuesV2/task3_auth_regressor.pkl /content/drive/MyDrive/TalkLikeMom/src/Classifier/ShapleyValuesV2/task3_auth_regressor.pkl

In [98]:
cp -r ./Regressors /content/drive/MyDrive/TalkLikeMom/src/Classifier/RegressorsV2

[0m[01;34msample_data[0m/
