## Imports

In [1]:
import os
import pathlib

from datasets import load_dataset, concatenate_datasets
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding
import wandb

from hf_wrapper import GPTForSequenceClassification
from tokenizer import load_character_tokenizer
from utils import load_random_from_pretrained_model, compute_metrics, flatten_multi_features

## Variables

In [2]:
hf_cache = pathlib.Path('./cache')
training_checkpoints = pathlib.Path('./cache/checkpoints')
checkpoint = training_checkpoints / 'russian_polish_normal_12_5_50k' / 'ckpt.pt'
tokenizer_prefix = pathlib.Path('./cache/tokenizers')
ipa_tokenizer_path = tokenizer_prefix / 'hindi-urdu-character-tokenizer-ipa.json'
normal_tokenizer_path = tokenizer_prefix / 'hindi-urdu-character-tokenizer-normal.json'

dataset_name = {
    'hin': "krishnAbadikelA/hindi-xnli-ipa",
    'urd': "krishnAbadikelA/urdu-xnli-ipa"
}

epochs = 3
context_size = 1024
batch_size = 16
learning_rate = 2e-5

project_name = f"debug-hindi-urdu-small-finetuning-xnli-random-initial-phonemizer"

## Functions

In [3]:
def load_and_preprocess(lang: str, ipa: bool, split: str, tokenizer):
    ds = load_dataset(dataset_name[lang], split=split, cache_dir=str(hf_cache))
    column_names = ['premise', 'hypothesis']
    suffix = 'phonemizer'
    column_names = [(f'{column_name}-{suffix}' if ipa else column_name) for column_name in column_names]

    def preprocess(examples):
        features = flatten_multi_features(examples, column_names, sequence_token='<ENDOFTEXT>')
        encoded = tokenizer(features, truncation=True, max_length=context_size)
        encoded['label'] = examples['label']
        return encoded

    return ds.map(preprocess, batched=True, num_proc=os.cpu_count())

In [4]:
def train_model(lang: str, ipa: bool) -> Trainer:
    temporary_output_dir = training_checkpoints / f"{project_name}-{lang}-{'ipa' if ipa else 'normal'}/"
    temporary_output_dir.mkdir(parents=True, exist_ok=True)

    vocab_path = ipa_tokenizer_path if ipa else normal_tokenizer_path
    tokenizer = load_character_tokenizer(vocab_path)

    base_model = load_random_from_pretrained_model(checkpoint, 'cuda')
    base_model.config.pad_token_id = tokenizer.pad_token_id
    base_model.config.padding_side = tokenizer.padding_side
    model = GPTForSequenceClassification(base_model, num_classes=3).to('cuda')

    if lang == 'hin':
        train_dataset = load_and_preprocess('hin', ipa, 'train', tokenizer)
        eval_dataset = load_and_preprocess('hin', ipa, 'validation', tokenizer)
    elif lang == 'urd':
        train_dataset = load_and_preprocess('urd', ipa, 'train', tokenizer)
        eval_dataset = load_and_preprocess('urd', ipa, 'validation', tokenizer)
    elif lang == 'both':
        hin_train_dataset = load_and_preprocess('hin', ipa, 'train', tokenizer)
        urd_train_dataset = load_and_preprocess('urd', ipa, 'train', tokenizer)
        train_dataset = concatenate_datasets([hin_train_dataset, urd_train_dataset])

        hin_eval_dataset = load_and_preprocess('hin', ipa, 'validation', tokenizer)
        urd_eval_dataset = load_and_preprocess('urd', ipa, 'validation', tokenizer)
        eval_dataset = concatenate_datasets([hin_eval_dataset, urd_eval_dataset])
    else:
        raise ValueError(f'Unknown train language: {lang}')

    training_args = TrainingArguments(
        eval_strategy="steps",
        eval_steps=0.01,
        output_dir=str(temporary_output_dir),
        save_strategy='steps',
        save_total_limit=1,
        save_steps=0.01,
        metric_for_best_model="precision",
        load_best_model_at_end=True,
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=0.01,
        logging_steps=0.01,
        fp16=True,
        warmup_ratio=0.3,
        save_safetensors=False,
        # disable_tqdm=True,
    )

    wrun = wandb.init(entity='aaronjencks-the-ohio-state-university', project=project_name, name=f'{lang}-{"ipa" if ipa else "normal"}')

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
        compute_metrics=compute_metrics,
    )

    print(f"Training model")
    trainer.train()

    wrun.finish()

    return trainer

In [5]:
def finetune_transcription(train_lang: str, eval_lang: str, ipa: bool, model: Trainer):
    print('finetuning on {} {}'.format(eval_lang, 'ipa' if ipa else 'normal'))
    vocab_path = ipa_tokenizer_path if ipa else normal_tokenizer_path
    tokenizer = load_character_tokenizer(vocab_path)

    if eval_lang == 'both':
        hin_eval_dataset = load_and_preprocess('hin', ipa, 'validation', tokenizer)
        urd_eval_dataset = load_and_preprocess('urd', ipa, 'validation', tokenizer)
        eval_dataset = concatenate_datasets([hin_eval_dataset, urd_eval_dataset])
    else:
        eval_dataset = load_and_preprocess(eval_lang, ipa, 'validation', tokenizer)

    wrun = wandb.init(entity='aaronjencks-the-ohio-state-university', project=project_name, name=f'{train_lang}-{eval_lang}-{"ipa" if ipa else "normal"}')

    print(f"Final evaluation on {eval_lang}")
    results = model.evaluate(eval_dataset=eval_dataset)
    print(results)

    wrun.finish()

## Finetuning

In [None]:
model = train_model('hin', False)

number of parameters: 123.35M


[34m[1mwandb[0m: Currently logged in as: [33maaronjencks[0m ([33maaronjencks-the-ohio-state-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
737,1.1699,1.13883,0.335743,0.33624,0.335743,0.331196
1474,1.1313,1.123982,0.343775,0.344032,0.343775,0.333005


In [9]:
finetune_transcription('hin', 'hin', False, model)

finetuning on hin normal


Final evaluation on hin


{'eval_loss': 1.1103460788726807, 'eval_accuracy': 0.40555555555555556, 'eval_precision': 0.4109192134565269, 'eval_recall': 0.40555555555555556, 'eval_f1': 0.38466769140682183, 'eval_runtime': 0.9409, 'eval_samples_per_second': 191.306, 'eval_steps_per_second': 12.754, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.40556
eval/f1,0.38467
eval/loss,1.11035
eval/precision,0.41092
eval/recall,0.40556
eval/runtime,0.9409
eval/samples_per_second,191.306
eval/steps_per_second,12.754
train/epoch,3.0
train/global_step,135.0


In [10]:
model = train_model('hin', True)

number of parameters: 123.35M


Map (num_proc=16):   0%|          | 0/718 [00:00<?, ? examples/s]

Map (num_proc=16):   0%|          | 0/180 [00:00<?, ? examples/s]

  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
2,1.2067,1.168423,0.388889,0.418428,0.388889,0.366746
4,1.2047,1.157199,0.383333,0.414584,0.383333,0.360936
6,1.1014,1.148752,0.388889,0.411047,0.388889,0.369214
8,1.058,1.140294,0.383333,0.402689,0.383333,0.364855
10,0.9892,1.144377,0.388889,0.420409,0.388889,0.36291
12,1.277,1.145633,0.388889,0.426023,0.388889,0.356876
14,1.1131,1.141759,0.383333,0.409683,0.383333,0.355025
16,1.1793,1.135447,0.383333,0.410995,0.383333,0.358547
18,1.102,1.128776,0.388889,0.411137,0.388889,0.372985
20,1.0428,1.140373,0.383333,0.408202,0.383333,0.362273


0,1
eval/accuracy,██▇█▆▃▂▄▂▄▅▅▃▅▅▃▆▇▅▅▅█▄▄▅▃▃▃▁▁▅▂▃▃▄▄▃▃▄▄
eval/f1,▇▇▇▇▇▇▇▇▅▄▃▂▂▄▁▃▅▅▅▅▃▄█▇▅▅▅▆▄▃▃▃▅▄▅▅▅▅▅▆
eval/loss,▂▂▂▂▂▂▁▁▁▂▆▆▅▅█▅▅▄▃▃▅▄▂▁▂▃▃▃▃▃▄▅▄▄▃▃▃▃▃▃
eval/precision,█▇███▇▇▆▅▂▅▅▅▆▁▅▆▆██▇▅▆▇▇▆▆▅▅▅▇▅▅▇▅▅▅▅▅▆
eval/recall,██████▄▇▂▂▁▂▃▁▄▄▅▃▂▆▅▄▃▅▆▃▅▂▂▂▂▅▁▁▃▃▃▃▃▄
eval/runtime,▂▂▁▆▃▂▄▃▃▃▃▂▃▃▃▃▃▃▃▆▃▄▃▃▃▃▃▄▃▃▄▃▃▃▁█▄▄▇▃
eval/samples_per_second,▆██▂▆▅▇▇▄▆▆▆▆▆▆▆▆▆▆▆▆▅▁▆▄▅▅▆▅▅▄▅▆▅▅▄▄▄▄▅
eval/steps_per_second,▆▇█▇▅▇▇▇▇▅▆▆▆▆▆▆▆▆▆▆▆▃▅▅▆▆▆▆▆▅▆▆▅▆▆█▁▅▅▆
train/epoch,▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██
train/global_step,▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██

0,1
eval/accuracy,0.33333
eval/f1,0.32621
eval/loss,1.16696
eval/precision,0.34994
eval/recall,0.33333
eval/runtime,0.9225
eval/samples_per_second,195.121
eval/steps_per_second,13.008
total_flos,0.0
train/epoch,3.0


In [11]:
finetune_transcription('hin', 'hin', True, model)

finetuning on hin ipa


Final evaluation on hin


{'eval_loss': 1.1456326246261597, 'eval_accuracy': 0.3888888888888889, 'eval_precision': 0.42602325083038683, 'eval_recall': 0.3888888888888889, 'eval_f1': 0.3568763059169542, 'eval_runtime': 0.9495, 'eval_samples_per_second': 189.573, 'eval_steps_per_second': 12.638, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.38889
eval/f1,0.35688
eval/loss,1.14563
eval/precision,0.42602
eval/recall,0.38889
eval/runtime,0.9495
eval/samples_per_second,189.573
eval/steps_per_second,12.638
train/epoch,3.0
train/global_step,135.0


In [12]:
model = train_model('urd', False)

number of parameters: 123.35M


Generating train split:   0%|          | 0/10949 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/2738 [00:00<?, ? examples/s]

Map (num_proc=16):   0%|          | 0/10949 [00:00<?, ? examples/s]

Map (num_proc=16):   0%|          | 0/2738 [00:00<?, ? examples/s]

  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
21,1.1274,1.117578,0.398466,0.386979,0.398466,0.388329
42,1.1685,1.063344,0.422571,0.407185,0.422571,0.409789
63,1.0483,1.016537,0.445946,0.417976,0.445946,0.4247
84,1.0245,0.993265,0.467495,0.441474,0.467495,0.443399
105,1.0148,0.972458,0.486852,0.479213,0.486852,0.471848
126,1.0056,0.959132,0.498904,0.488214,0.498904,0.486051
147,0.9736,0.952819,0.480643,0.498217,0.480643,0.48369
168,0.9525,0.931228,0.503287,0.482517,0.503287,0.483466
189,0.9962,0.929042,0.510957,0.505904,0.510957,0.507826
210,0.9855,0.942219,0.501096,0.492023,0.501096,0.447859


0,1
eval/accuracy,▁▃▃▄▃▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▇▆▆▇▇▇▇▇▇▇▇█▇███████
eval/f1,▁▃▄▄▃▄▄▅▅▆▆▆▅▆▄▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇███████
eval/loss,█▆▆▅▅▄▅▅▄▄▅▄▅▄▅█▅▄▃▄▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
eval/precision,▁▃▃▆▄▄▅▅▅▅▅█▆▆▆▆▆▆▆▆▇▇▇▇▇▆▇▇▇▇▇██▇▇█████
eval/recall,▁▂▂▃▃▂▃▄▃▄▄▅▄▄▅▄▆▆▆▇▇▆▆▇▇▇▇▇▇▇▇█▇▇▇█████
eval/runtime,▄▁▃▁▇▄▁▆▆▂▂▁▄█▂▁▃▂▁▃▅▃▁▂▁▃▇▂▂▃▂▇▁▂▃▄▂▃▂▂
eval/samples_per_second,▃▂█▆▆▂▆▅█▇▇▆▅█▇█▆██▆▄▄██▅█▂▇▃▇▇▆▇▂▇▅▇█▇▁
eval/steps_per_second,█▁▄▆▆█▄▄█▄▇▇▇█▆▇▆▇██▅▄▆█▆▇▂▇▅▇▇▇▅▇█▃▇▇▅▃
train/epoch,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇█
train/global_step,▁▁▂▂▂▂▂▂▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇█

0,1
eval/accuracy,0.63112
eval/f1,0.63073
eval/loss,0.8087
eval/precision,0.63202
eval/recall,0.63112
eval/runtime,1.8334
eval/samples_per_second,1493.425
eval/steps_per_second,93.816
total_flos,0.0
train/epoch,3.0


In [13]:
finetune_transcription('urd', 'urd', False, model)

finetuning on urd normal


Final evaluation on urd


{'eval_loss': 0.8098613619804382, 'eval_accuracy': 0.6289262235208181, 'eval_precision': 0.6321365985140542, 'eval_recall': 0.6289262235208181, 'eval_f1': 0.6297725883329489, 'eval_runtime': 1.9213, 'eval_samples_per_second': 1425.087, 'eval_steps_per_second': 89.523, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.62893
eval/f1,0.62977
eval/loss,0.80986
eval/precision,0.63214
eval/recall,0.62893
eval/runtime,1.9213
eval/samples_per_second,1425.087
eval/steps_per_second,89.523
train/epoch,3.0
train/global_step,2055.0


In [14]:
model = train_model('urd', True)

number of parameters: 123.35M


Map (num_proc=16):   0%|          | 0/10949 [00:00<?, ? examples/s]

Map (num_proc=16):   0%|          | 0/2738 [00:00<?, ? examples/s]

  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
21,1.1604,1.128288,0.3729,0.345556,0.3729,0.345868
42,1.1327,1.110017,0.40504,0.360176,0.40504,0.358687
63,1.1231,1.106823,0.394449,0.363025,0.394449,0.370235
84,1.1306,1.097492,0.407962,0.370738,0.407962,0.374601
105,1.0888,1.103557,0.385683,0.378156,0.385683,0.353834
126,1.1423,1.079785,0.432798,0.398251,0.432798,0.39508
147,1.125,1.076625,0.436815,0.420003,0.436815,0.375179
168,1.0728,1.0654,0.464573,0.447882,0.464573,0.356416
189,1.086,1.059854,0.445581,0.423039,0.445581,0.410816
210,1.0566,1.091164,0.454711,0.484145,0.454711,0.30697


0,1
eval/accuracy,▁▂▃▄▃▃▄▄▅▅▆▅▅▅▆▆▇▆▇▆▆▆▇▇▇▇▇▇▇▇█▇▇███████
eval/f1,▂▂▂▂▃▁▄▄▅▅▆▆▅▅▆▅▇▇▇▇▇▅▇▇▇███▇▇██████████
eval/loss,█▇▇█▇▆▆▆▆▄▄▅▄▅▃▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▂▂▂▂▂▁▁▁▁▁
eval/precision,▁▂▂▃▃▄▄▅▅▆▆▇▆▆▆▇▇▆▇▇▇▇▇▇▇█▇▇█▇██████████
eval/recall,▁▂▂▁▃▄▅▄▆▅▁▆▆▆▇▇▆▆▆▇▆▇▇▇▇▇▇▇████████████
eval/runtime,█▇▄▃▃▃▆▃█▃▃▃▆▃▃▃▄▃▁▁▂▄▂▄▁▅▄▅▃▂▁▂▂▃▂▃▁▂▂▄
eval/samples_per_second,▁▂▆▆▅▁▆▆▅▃▆▆▄▄▇▆██▄▇█▆▄▇▅▆█▂▇▆█▁▇▇▇▅▇█▇█
eval/steps_per_second,▂▆▆▆▃▁▆▂▆▅▃▃▃▆▄▇▆▇▅▇▇█▄█▆█▄█▆▇██▇▄███▇▇▂
train/epoch,▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇█

0,1
eval/accuracy,0.61286
eval/f1,0.60832
eval/loss,0.86318
eval/precision,0.60873
eval/recall,0.61286
eval/runtime,2.5222
eval/samples_per_second,1085.567
eval/steps_per_second,68.195
total_flos,0.0
train/epoch,3.0


In [15]:
finetune_transcription('urd', 'urd', True, model)

finetuning on urd ipa


Final evaluation on urd


{'eval_loss': 0.8738154768943787, 'eval_accuracy': 0.6106647187728269, 'eval_precision': 0.6109767155664939, 'eval_recall': 0.6106647187728269, 'eval_f1': 0.5984703363788032, 'eval_runtime': 2.6108, 'eval_samples_per_second': 1048.73, 'eval_steps_per_second': 65.881, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.61066
eval/f1,0.59847
eval/loss,0.87382
eval/precision,0.61098
eval/recall,0.61066
eval/runtime,2.6108
eval/samples_per_second,1048.73
eval/steps_per_second,65.881
train/epoch,3.0
train/global_step,2055.0


In [16]:
model = train_model('both', False)

number of parameters: 123.35M


  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
22,1.169,1.119362,0.412269,0.405745,0.412269,0.406917
44,1.0992,1.066482,0.427005,0.408457,0.427005,0.41261
66,1.0572,1.021486,0.449623,0.424308,0.449623,0.422563
88,1.0083,0.996762,0.469157,0.44906,0.469157,0.444644
110,1.0114,0.982128,0.476696,0.470299,0.476696,0.460171
132,1.0024,0.973767,0.477382,0.481078,0.477382,0.475393
154,1.0212,0.956164,0.502742,0.48436,0.502742,0.476162
176,0.9956,0.945102,0.506511,0.502047,0.506511,0.502893
198,0.9842,0.953856,0.50891,0.505217,0.50891,0.497495
220,1.0041,0.953585,0.50377,0.530816,0.50377,0.47745


0,1
eval/accuracy,▁▂▃▃▄▄▄▄▅▆▅▅▅▅▅▆▅▆▆▅▆▆▇▆▇▆▇▇▇▇▇█████████
eval/f1,▁▂▂▃▂▃▂▄▄▄▄▃▅▆▆▆▆▆▅▆▇▆▆▇▇▇▇▇▇▇█▇▇███████
eval/loss,█▇▅▅▅▆▄▄▄▄▄▅▄▃▄▄▄▄▃▄▃▃▂▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
eval/precision,▁▂▃▅▄▅▄▅▄▅▅▄▅▅▅▅▅▆▇█▇▇▇▇█▇█▇▇█▇█████████
eval/recall,▂▂▂▁▂▃▃▃▄▃▄▄▁▄▄▃▅▄▅▃▄▅▆▅▆▅▆▆▇▇▇▆▇█▇▇████
eval/runtime,▆▃▄▅▆▂▅▄▂▂█▂▂▅▅▄▁▂▂▂▁▃▄▁▃▂▄▇▂▂▅▄▅▂▃▅▂▆▂▃
eval/samples_per_second,▆▇█▇▅▂▇▁▇▇▅▇▇▇▇▅▁▆▆▆▄▇▇▆▆▇▇▄▅█▇▅▂▇▇▅▆▄▄▆
eval/steps_per_second,▆█▇▅▅▇▇▇▇▇▇▁▆▆▄▅▇▆▇▇▅▇▄▄█▇█▇▇▅▇▇▇▇▆▇▄▃▃▆
train/epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████

0,1
eval/accuracy,0.6172
eval/f1,0.61386
eval/loss,0.82347
eval/precision,0.61345
eval/recall,0.6172
eval/runtime,2.7631
eval/samples_per_second,1056.071
eval/steps_per_second,66.231
total_flos,0.0
train/epoch,3.0


In [17]:
finetune_transcription('both', 'hin', False, model)

finetuning on hin normal


Final evaluation on hin


{'eval_loss': 1.1249783039093018, 'eval_accuracy': 0.39444444444444443, 'eval_precision': 0.36400384227765176, 'eval_recall': 0.39444444444444443, 'eval_f1': 0.33342777777777777, 'eval_runtime': 0.9833, 'eval_samples_per_second': 183.056, 'eval_steps_per_second': 12.204, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.39444
eval/f1,0.33343
eval/loss,1.12498
eval/precision,0.364
eval/recall,0.39444
eval/runtime,0.9833
eval/samples_per_second,183.056
eval/steps_per_second,12.204
train/epoch,3.0
train/global_step,2190.0


In [18]:
finetune_transcription('both', 'urd', False, model)

finetuning on urd normal


Final evaluation on urd
{'eval_loss': 0.8182193636894226, 'eval_accuracy': 0.6278305332359386, 'eval_precision': 0.6360188870198712, 'eval_recall': 0.6278305332359386, 'eval_f1': 0.6300529666597294, 'eval_runtime': 1.9257, 'eval_samples_per_second': 1421.846, 'eval_steps_per_second': 89.32, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.62783
eval/f1,0.63005
eval/loss,0.81822
eval/precision,0.63602
eval/recall,0.62783
eval/runtime,1.9257
eval/samples_per_second,1421.846
eval/steps_per_second,89.32
train/epoch,3.0
train/global_step,2190.0


In [19]:
model = train_model('both', True)

number of parameters: 123.35M


  trainer = Trainer(


Training model


Step,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
22,1.1841,1.15349,0.338245,0.374076,0.338245,0.340743
44,1.1602,1.104608,0.402673,0.367311,0.402673,0.360764
66,1.1142,1.098252,0.429061,0.376201,0.429061,0.348084
88,1.111,1.106913,0.395819,0.39001,0.395819,0.359434
110,1.0967,1.082427,0.421179,0.378773,0.421179,0.36956
132,1.1436,1.074089,0.434887,0.389975,0.434887,0.364891
154,1.1018,1.066502,0.433859,0.399181,0.433859,0.385221
176,1.1081,1.072123,0.44414,0.407529,0.44414,0.376222
198,1.1003,1.063484,0.436943,0.424928,0.436943,0.417459
220,1.0795,1.073272,0.442426,0.445634,0.442426,0.391857


0,1
eval/accuracy,▁▁▃▃▃▃▃▃▃▄▅▆▅▆▆▆▅▄▆▆▆▆▇▇▇▇▇▇▇▇▇▆▇▇█▇▇███
eval/f1,▁▁▁▂▂▂▂▃▃▃▄▄▅▆▆▆▆▆▇▅▆▆▇▇▇▇▆▇▇▇███▇▇██▇██
eval/loss,█▇▇▇▇▆▆▄▄▆▄▄▄▄▃▃▅▅▃▃▄▃▂▂▄▂▂▃▂▂▁▂▂▁▁▁▁▁▁▁
eval/precision,▁▁▃▃▄▄▅▅▄▅▆▄▆▆▆▆▇▇▇▇▇▇█▇▇▇██▇▇█▇█▇██████
eval/recall,▁▂▂▂▂▃▃▃▃▄▄▃▅▄▅▆▆▆▆▅▇▆▅▆▆▅▆▇▇▇▇▇█▇▇▇█▇██
eval/runtime,▁▂▁▅▅▁▂▄▇▃▇▆▃▃▇▅▁▂▁▃▂▅▂▅▄▅▂▂▅▂▁▁▄▃▂▁▃█▅▃
eval/samples_per_second,█▁██▇█▇█▆▇█▇▃▇▇█▇█▆▂▁█▄▆▅▇▇▂▇▇█▆▆▅█▂▇▄▇▆
eval/steps_per_second,▄█▁▇█▅▆▃▅▅▇█▆▂▇▆█▆▃▇▆▇▄▇▂▄▇▄█▇▇▇▆▆▅█▆▆▇▂
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇██

0,1
eval/accuracy,0.61172
eval/f1,0.60289
eval/loss,0.86702
eval/precision,0.60734
eval/recall,0.61172
eval/runtime,3.5867
eval/samples_per_second,813.572
eval/steps_per_second,51.022
total_flos,0.0
train/epoch,3.0


In [20]:
finetune_transcription('both', 'hin', True, model)

finetuning on hin ipa


Final evaluation on hin


{'eval_loss': 1.1346218585968018, 'eval_accuracy': 0.38333333333333336, 'eval_precision': 0.3872128265745287, 'eval_recall': 0.38333333333333336, 'eval_f1': 0.37299784555219334, 'eval_runtime': 0.9254, 'eval_samples_per_second': 194.503, 'eval_steps_per_second': 12.967, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.38333
eval/f1,0.373
eval/loss,1.13462
eval/precision,0.38721
eval/recall,0.38333
eval/runtime,0.9254
eval/samples_per_second,194.503
eval/steps_per_second,12.967
train/epoch,3.0
train/global_step,2190.0


In [21]:
finetune_transcription('both', 'urd', True, model)

finetuning on urd ipa


Final evaluation on urd
{'eval_loss': 0.8494318127632141, 'eval_accuracy': 0.6267348429510592, 'eval_precision': 0.6224711021011972, 'eval_recall': 0.6267348429510592, 'eval_f1': 0.6170482667388592, 'eval_runtime': 2.6734, 'eval_samples_per_second': 1024.18, 'eval_steps_per_second': 64.339, 'epoch': 3.0}


0,1
eval/accuracy,▁
eval/f1,▁
eval/loss,▁
eval/precision,▁
eval/recall,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁
train/global_step,▁

0,1
eval/accuracy,0.62673
eval/f1,0.61705
eval/loss,0.84943
eval/precision,0.62247
eval/recall,0.62673
eval/runtime,2.6734
eval/samples_per_second,1024.18
eval/steps_per_second,64.339
train/epoch,3.0
train/global_step,2190.0
