In [1]:
import torch
import transformers

from datasets import load_dataset
import numpy as np

from project import CreditDefault, PII, SkinCancer, set_seed

set_seed(1234)

## Credit-card default

In [3]:
credit = CreditDefault()
credit.prepare_dataset("cuda")

In [6]:
optimizers = {
    "adamw": (torch.optim.AdamW, {"lr": 2e-5}),
    "rmsprop": (torch.optim.RMSprop, {"lr": 2e-5}),
    "adagrad": (torch.optim.Adagrad, {"lr": 2e-5}),
}

schedulers = {
    "linear": (torch.optim.lr_scheduler.LinearLR, {"start_factor": 0.01, "end_factor": 0.0001, "total_iters": 2}),
    "cosine": (torch.optim.lr_scheduler.CosineAnnealing, {"T_max": 100, "eta_min": 0.01}),
    "polynomial": (torch.optim.lr_scheduler.PolynomialLR, {"total_iters": 100, "power": 2})
}

In [7]:
df = credit.train(
    result_csv = "test.csv", 
    optimizers = optimizers,
    schedulers = schedulers,
    epochs = 5,
    batch_size = 1000,
    device = "cuda",
    loss_func = torch.nn.MSELoss
    )

-------------------------------------
|linear, adamw|
Epoch 1
	batch 1 | loss: 0.4424096345901489 | accuracy: 0.537
	batch 2 | loss: 0.4221212863922119 | accuracy: 0.556
	batch 3 | loss: 0.41576284170150757 | accuracy: 0.563
	batch 4 | loss: 0.3999929428100586 | accuracy: 0.584
	batch 5 | loss: 0.3750000596046448 | accuracy: 0.605
	batch 6 | loss: 0.35946759581565857 | accuracy: 0.618
	batch 7 | loss: 0.3029974699020386 | accuracy: 0.673
	batch 8 | loss: 0.3039945960044861 | accuracy: 0.68
	batch 9 | loss: 0.24527829885482788 | accuracy: 0.725
	batch 10 | loss: 0.24620354175567627 | accuracy: 0.715
	batch 11 | loss: 0.2339823693037033 | accuracy: 0.742
	batch 12 | loss: 0.23700402677059174 | accuracy: 0.746
	batch 13 | loss: 0.22199484705924988 | accuracy: 0.767
	batch 14 | loss: 0.2247716784477234 | accuracy: 0.767
	batch 15 | loss: 0.23900000751018524 | accuracy: 0.755
	batch 16 | loss: 0.2389996200799942 | accuracy: 0.753
	batch 17 | loss: 0.2141554355621338 | accuracy: 0.78
	batch 

## PII

In [2]:
model_path = "distilbert/distilroberta-base"

pii = PII()
pii.load_tokenizer(model_path)
pii.prepare_dataset()

In [7]:
optimizers = {
    "adamw": (torch.optim.AdamW, {"lr": 2e-5}),
    "rmsprop": (torch.optim.RMSprop, {"lr": 2e-5, "momentum": 0}),
    "adagrad": (torch.optim.Adagrad, {"lr": 3e-5}),
}

schedulers = {
    "linear": {"num_warmup_steps": 0},
    "cosine": {"num_warmup_steps": 0},
    "polynomial": {"num_warmup_steps": 0, "power": 2}
}

In [8]:
df = pii.train(
    output_dir = "test/",
    optimizers = optimizers,
    schedulers = schedulers,
    tokenizer_path = model_path,
    model_path = model_path,
    strategy = "epoch",
    epochs = 10,
    batch_size = 150,
    result_csv = "test.csv",
    device = "cuda"
)

----------------------------
Optimizer: adamw | Scheduler: linear


Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.13120447099208832, 'eval_precision': 0.7934772788869816, 'eval_recall': 0.8295239894219871, 'eval_f1': 0.8111003370734635, 'eval_accuracy': 0.9619373446657052, 'eval_runtime': 3.9377, 'eval_samples_per_second': 825.357, 'eval_steps_per_second': 5.587, 'epoch': 1.0}
{'eval_loss': 0.06966577470302582, 'eval_precision': 0.9029126213592233, 'eval_recall': 0.9222704948998867, 'eval_f1': 0.9124889034247536, 'eval_accuracy': 0.980040501979074, 'eval_runtime': 4.2822, 'eval_samples_per_second': 758.952, 'eval_steps_per_second': 5.138, 'epoch': 2.0}
{'eval_loss': 0.05863739550113678, 'eval_precision': 0.9199440820130476, 'eval_recall': 0.9322818284850775, 'eval_f1': 0.9260718641523594, 'eval_accuracy': 0.9825565340124575, 'eval_runtime': 4.0709, 'eval_samples_per_second': 798.356, 'eval_steps_per_second': 5.404, 'epoch': 3.0}
{'eval_loss': 0.053025200963020325, 'eval_precision': 0.9270378287944976, 'eval_recall': 0.9420098224404987, 'eval_f1': 0.9344638590902703, 'eval_accuracy'

Checkpoint destination directory test//adamw_linear\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.1533, 'grad_norm': 0.6746379733085632, 'learning_rate': 9.316239316239318e-06, 'epoch': 5.32}
{'eval_loss': 0.05085839331150055, 'eval_precision': 0.9328066914498141, 'eval_recall': 0.9479599546656592, 'eval_f1': 0.940322278433577, 'eval_accuracy': 0.9856402074192262, 'eval_runtime': 3.9764, 'eval_samples_per_second': 817.332, 'eval_steps_per_second': 5.533, 'epoch': 6.0}
{'eval_loss': 0.05173797905445099, 'eval_precision': 0.9363069187075147, 'eval_recall': 0.9496599924442766, 'eval_f1': 0.9429361841796784, 'eval_accuracy': 0.9862078487926115, 'eval_runtime': 3.98, 'eval_samples_per_second': 816.583, 'eval_steps_per_second': 5.528, 'epoch': 7.0}
{'eval_loss': 0.05042613670229912, 'eval_precision': 0.9377736376339078, 'eval_recall': 0.9507933509633547, 'eval_f1': 0.9442386155794213, 'eval_accuracy': 0.9864686569911939, 'eval_runtime': 4.1097, 'eval_samples_per_second': 790.819, 'eval_steps_per_second': 5.353, 'epoch': 8.0}
{'eval_loss': 0.05087592825293541, 'eval_precision':

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.1332087516784668, 'eval_precision': 0.7909131803868646, 'eval_recall': 0.8302795617680393, 'eval_f1': 0.8101184168087361, 'eval_accuracy': 0.9617072197846032, 'eval_runtime': 4.1014, 'eval_samples_per_second': 792.405, 'eval_steps_per_second': 5.364, 'epoch': 1.0}
{'eval_loss': 0.07017938047647476, 'eval_precision': 0.9055045019957301, 'eval_recall': 0.9213260294673215, 'eval_f1': 0.9133467534291466, 'eval_accuracy': 0.9795495688993894, 'eval_runtime': 4.0259, 'eval_samples_per_second': 807.264, 'eval_steps_per_second': 5.465, 'epoch': 2.0}
{'eval_loss': 0.05652053654193878, 'eval_precision': 0.9211335881420714, 'eval_recall': 0.9332262939176427, 'eval_f1': 0.9271405113769646, 'eval_accuracy': 0.9828940505047405, 'eval_runtime': 4.0496, 'eval_samples_per_second': 802.556, 'eval_steps_per_second': 5.433, 'epoch': 3.0}
{'eval_loss': 0.052355531603097916, 'eval_precision': 0.9326932040645101, 'eval_recall': 0.9449376652814507, 'eval_f1': 0.9387755102040817, 'eval_accuracy'

Checkpoint destination directory test//adamw_cosine\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.154, 'grad_norm': 0.9108741283416748, 'learning_rate': 8.928015042125523e-06, 'epoch': 5.32}
{'eval_loss': 0.05079275369644165, 'eval_precision': 0.9350637268583124, 'eval_recall': 0.9492822062712505, 'eval_f1': 0.942119323241318, 'eval_accuracy': 0.9859623822527692, 'eval_runtime': 4.0458, 'eval_samples_per_second': 803.293, 'eval_steps_per_second': 5.438, 'epoch': 6.0}
{'eval_loss': 0.0516202487051487, 'eval_precision': 0.9369235069412094, 'eval_recall': 0.9497544389875331, 'eval_f1': 0.9432953426199522, 'eval_accuracy': 0.9862538737688319, 'eval_runtime': 3.974, 'eval_samples_per_second': 817.823, 'eval_steps_per_second': 5.536, 'epoch': 7.0}
{'eval_loss': 0.050755225121974945, 'eval_precision': 0.938995995156934, 'eval_recall': 0.9522100491122025, 'eval_f1': 0.9455568581477141, 'eval_accuracy': 0.9866680985548157, 'eval_runtime': 4.0091, 'eval_samples_per_second': 810.658, 'eval_steps_per_second': 5.488, 'epoch': 8.0}
{'eval_loss': 0.051185496151447296, 'eval_precision':

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.14225637912750244, 'eval_precision': 0.7803478572709341, 'eval_recall': 0.8220627125047223, 'eval_f1': 0.8006623125747402, 'eval_accuracy': 0.9595287042435028, 'eval_runtime': 4.053, 'eval_samples_per_second': 801.87, 'eval_steps_per_second': 5.428, 'epoch': 1.0}
{'eval_loss': 0.07827768474817276, 'eval_precision': 0.893463810930576, 'eval_recall': 0.9140536456365697, 'eval_f1': 0.903641456582633, 'eval_accuracy': 0.9774631033107299, 'eval_runtime': 4.0544, 'eval_samples_per_second': 801.603, 'eval_steps_per_second': 5.426, 'epoch': 2.0}
{'eval_loss': 0.06198497116565704, 'eval_precision': 0.9135401188707281, 'eval_recall': 0.9290706460143558, 'eval_f1': 0.9212399325716426, 'eval_accuracy': 0.9816667178055292, 'eval_runtime': 4.0421, 'eval_samples_per_second': 804.032, 'eval_steps_per_second': 5.443, 'epoch': 3.0}
{'eval_loss': 0.05753719434142113, 'eval_precision': 0.9223841797418996, 'eval_recall': 0.9383264072534945, 'eval_f1': 0.9302869984549837, 'eval_accuracy': 0.

Checkpoint destination directory test//adamw_polynomial\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.1641, 'grad_norm': 1.2281861305236816, 'learning_rate': 4.4179176711228e-06, 'epoch': 5.32}
{'eval_loss': 0.05387108400464058, 'eval_precision': 0.9279203869047619, 'eval_recall': 0.9422931620702683, 'eval_f1': 0.9350515463917526, 'eval_accuracy': 0.9845816329661563, 'eval_runtime': 4.1712, 'eval_samples_per_second': 779.156, 'eval_steps_per_second': 5.274, 'epoch': 6.0}
{'eval_loss': 0.053280774503946304, 'eval_precision': 0.9294806280776735, 'eval_recall': 0.9448432187381942, 'eval_f1': 0.9370989649196759, 'eval_accuracy': 0.9851646159982818, 'eval_runtime': 4.0251, 'eval_samples_per_second': 807.435, 'eval_steps_per_second': 5.466, 'epoch': 7.0}
{'eval_loss': 0.053525324910879135, 'eval_precision': 0.9301914854062093, 'eval_recall': 0.9451265583679638, 'eval_f1': 0.9375995502670289, 'eval_accuracy': 0.985195299315762, 'eval_runtime': 4.1106, 'eval_samples_per_second': 790.644, 'eval_steps_per_second': 5.352, 'epoch': 8.0}
{'eval_loss': 0.053594380617141724, 'eval_precisio

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.07745960354804993, 'eval_precision': 0.8957043879907621, 'eval_recall': 0.915753683415187, 'eval_f1': 0.9056180824732639, 'eval_accuracy': 0.9775398116044307, 'eval_runtime': 3.9849, 'eval_samples_per_second': 815.583, 'eval_steps_per_second': 5.521, 'epoch': 1.0}
{'eval_loss': 0.056669652462005615, 'eval_precision': 0.9287246944677675, 'eval_recall': 0.9402153381186249, 'eval_f1': 0.9344346928239545, 'eval_accuracy': 0.9838298916878893, 'eval_runtime': 4.6169, 'eval_samples_per_second': 703.941, 'eval_steps_per_second': 4.765, 'epoch': 2.0}
{'eval_loss': 0.052368924021720886, 'eval_precision': 0.9327488107452663, 'eval_recall': 0.9444654325651681, 'eval_f1': 0.9385705570416256, 'eval_accuracy': 0.9847964161885183, 'eval_runtime': 4.0259, 'eval_samples_per_second': 807.27, 'eval_steps_per_second': 5.465, 'epoch': 3.0}
{'eval_loss': 0.05050984025001526, 'eval_precision': 0.9361364907700913, 'eval_recall': 0.9483377408386853, 'eval_f1': 0.9421976165900348, 'eval_accuracy'

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.07774508744478226, 'eval_precision': 0.8958660871173587, 'eval_recall': 0.9149036645258783, 'eval_f1': 0.9052847997757114, 'eval_accuracy': 0.9776625448743518, 'eval_runtime': 3.9675, 'eval_samples_per_second': 819.158, 'eval_steps_per_second': 5.545, 'epoch': 1.0}
{'eval_loss': 0.05681734159588814, 'eval_precision': 0.9273862788963461, 'eval_recall': 0.9396486588590858, 'eval_f1': 0.933477200225183, 'eval_accuracy': 0.9836764751004878, 'eval_runtime': 3.9253, 'eval_samples_per_second': 827.964, 'eval_steps_per_second': 5.605, 'epoch': 2.0}
{'eval_loss': 0.05257377773523331, 'eval_precision': 0.9325382035035408, 'eval_recall': 0.9452210049112203, 'eval_f1': 0.9388367729831144, 'eval_accuracy': 0.9850572243871007, 'eval_runtime': 4.0122, 'eval_samples_per_second': 810.02, 'eval_steps_per_second': 5.483, 'epoch': 3.0}
{'eval_loss': 0.050495512783527374, 'eval_precision': 0.9353396068200875, 'eval_recall': 0.9481488477521722, 'eval_f1': 0.9417006707002485, 'eval_accuracy':

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.07879018783569336, 'eval_precision': 0.8937309574369864, 'eval_recall': 0.9142425387230827, 'eval_f1': 0.9038703954432981, 'eval_accuracy': 0.9771102451597067, 'eval_runtime': 4.0155, 'eval_samples_per_second': 809.358, 'eval_steps_per_second': 5.479, 'epoch': 1.0}
{'eval_loss': 0.058572858572006226, 'eval_precision': 0.9262126865671642, 'eval_recall': 0.9377597279939555, 'eval_f1': 0.9319504411488643, 'eval_accuracy': 0.9831702003620632, 'eval_runtime': 4.0321, 'eval_samples_per_second': 806.035, 'eval_steps_per_second': 5.456, 'epoch': 2.0}
{'eval_loss': 0.053742073476314545, 'eval_precision': 0.928604672810202, 'eval_recall': 0.9421987155270117, 'eval_f1': 0.9353523041582673, 'eval_accuracy': 0.9842747997913535, 'eval_runtime': 4.1321, 'eval_samples_per_second': 786.522, 'eval_steps_per_second': 5.324, 'epoch': 3.0}
{'eval_loss': 0.05085025727748871, 'eval_precision': 0.9336872496973083, 'eval_recall': 0.946826596146581, 'eval_f1': 0.9402110199296602, 'eval_accuracy'

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.28966662287712097, 'eval_precision': 0.6083135600667897, 'eval_recall': 0.6537589724216094, 'eval_f1': 0.6302180543542587, 'eval_accuracy': 0.9223251817986561, 'eval_runtime': 3.9089, 'eval_samples_per_second': 831.428, 'eval_steps_per_second': 5.628, 'epoch': 1.0}
{'eval_loss': 0.2080087661743164, 'eval_precision': 0.6982238166068773, 'eval_recall': 0.7536834151870041, 'eval_f1': 0.7248943997819868, 'eval_accuracy': 0.9419164800098186, 'eval_runtime': 3.9328, 'eval_samples_per_second': 826.388, 'eval_steps_per_second': 5.594, 'epoch': 2.0}
{'eval_loss': 0.1772531121969223, 'eval_precision': 0.7330237448523613, 'eval_recall': 0.7901397808840196, 'eval_f1': 0.7605108858688242, 'eval_accuracy': 0.9500322174833543, 'eval_runtime': 3.9726, 'eval_samples_per_second': 818.104, 'eval_steps_per_second': 5.538, 'epoch': 3.0}
{'eval_loss': 0.1612963080406189, 'eval_precision': 0.7531321686959591, 'eval_recall': 0.8061956932376275, 'eval_f1': 0.7787610619469026, 'eval_accuracy': 0

Checkpoint destination directory test//adagrad_linear\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.2928, 'grad_norm': 1.1826488971710205, 'learning_rate': 1.3974358974358975e-05, 'epoch': 5.32}
{'eval_loss': 0.1456688940525055, 'eval_precision': 0.777033663719975, 'eval_recall': 0.8218738194182093, 'eval_f1': 0.7988249873777941, 'eval_accuracy': 0.9589610628701175, 'eval_runtime': 4.008, 'eval_samples_per_second': 810.886, 'eval_steps_per_second': 5.489, 'epoch': 6.0}
{'eval_loss': 0.1416858285665512, 'eval_precision': 0.7797003210845522, 'eval_recall': 0.82565168114847, 'eval_f1': 0.8020183486238532, 'eval_accuracy': 0.9597895124420852, 'eval_runtime': 3.9955, 'eval_samples_per_second': 813.41, 'eval_steps_per_second': 5.506, 'epoch': 7.0}
{'eval_loss': 0.13919326663017273, 'eval_precision': 0.7832393231265109, 'eval_recall': 0.8262183604080091, 'eval_f1': 0.8041549846026567, 'eval_accuracy': 0.9602190788868092, 'eval_runtime': 3.9319, 'eval_samples_per_second': 826.564, 'eval_steps_per_second': 5.595, 'epoch': 8.0}
{'eval_loss': 0.13785520195960999, 'eval_precision': 0.

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.28139516711235046, 'eval_precision': 0.618368962787015, 'eval_recall': 0.6638647525500566, 'eval_f1': 0.6403097244363471, 'eval_accuracy': 0.9244883556810163, 'eval_runtime': 4.062, 'eval_samples_per_second': 800.09, 'eval_steps_per_second': 5.416, 'epoch': 1.0}
{'eval_loss': 0.19794824719429016, 'eval_precision': 0.7112318205712284, 'eval_recall': 0.7667170381564035, 'eval_f1': 0.7379329151895283, 'eval_accuracy': 0.9448620784879261, 'eval_runtime': 3.9274, 'eval_samples_per_second': 827.524, 'eval_steps_per_second': 5.602, 'epoch': 2.0}
{'eval_loss': 0.16779176890850067, 'eval_precision': 0.7429803714461755, 'eval_recall': 0.7972232716282585, 'eval_f1': 0.769146658162103, 'eval_accuracy': 0.9521033414132736, 'eval_runtime': 4.1261, 'eval_samples_per_second': 787.67, 'eval_steps_per_second': 5.332, 'epoch': 3.0}
{'eval_loss': 0.15220597386360168, 'eval_precision': 0.7669213004085983, 'eval_recall': 0.8154514544767661, 'eval_f1': 0.7904421862125791, 'eval_accuracy': 0.9

Checkpoint destination directory test//adagrad_cosine\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.2831, 'grad_norm': 1.14810049533844, 'learning_rate': 1.3392022563188283e-05, 'epoch': 5.32}
{'eval_loss': 0.13840194046497345, 'eval_precision': 0.7850366923214606, 'eval_recall': 0.8284850774461655, 'eval_f1': 0.8061759029500966, 'eval_accuracy': 0.9605872786965727, 'eval_runtime': 3.9551, 'eval_samples_per_second': 821.718, 'eval_steps_per_second': 5.562, 'epoch': 6.0}
{'eval_loss': 0.13562233746051788, 'eval_precision': 0.7876351290985437, 'eval_recall': 0.8326407253494522, 'eval_f1': 0.8095128781965933, 'eval_accuracy': 0.9614003866098002, 'eval_runtime': 4.1635, 'eval_samples_per_second': 780.601, 'eval_steps_per_second': 5.284, 'epoch': 7.0}
{'eval_loss': 0.13425825536251068, 'eval_precision': 0.7899919434249396, 'eval_recall': 0.8334907442387609, 'eval_f1': 0.8111586010386507, 'eval_accuracy': 0.961691878125863, 'eval_runtime': 4.0175, 'eval_samples_per_second': 808.965, 'eval_steps_per_second': 5.476, 'epoch': 8.0}
{'eval_loss': 0.13379499316215515, 'eval_precision'

Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at distilbert/distilroberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.29934048652648926, 'eval_precision': 0.5962062000526916, 'eval_recall': 0.6411975821684927, 'eval_f1': 0.6178839590443685, 'eval_accuracy': 0.9192875333681078, 'eval_runtime': 3.9493, 'eval_samples_per_second': 822.921, 'eval_steps_per_second': 5.571, 'epoch': 1.0}
{'eval_loss': 0.22242829203605652, 'eval_precision': 0.6837509822753863, 'eval_recall': 0.7396108802417831, 'eval_f1': 0.7105848192005807, 'eval_accuracy': 0.9389401982142309, 'eval_runtime': 3.9487, 'eval_samples_per_second': 823.047, 'eval_steps_per_second': 5.571, 'epoch': 2.0}
{'eval_loss': 0.1935511827468872, 'eval_precision': 0.7123072903057225, 'eval_recall': 0.7723838307517945, 'eval_f1': 0.7411300919842312, 'eval_accuracy': 0.9456751864011537, 'eval_runtime': 3.9712, 'eval_samples_per_second': 818.384, 'eval_steps_per_second': 5.54, 'epoch': 3.0}
{'eval_loss': 0.18016143143177032, 'eval_precision': 0.7315641968821159, 'eval_recall': 0.7889119758216849, 'eval_f1': 0.7591565936562755, 'eval_accuracy': 

Checkpoint destination directory test//adagrad_polynomial\checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'loss': 0.3092, 'grad_norm': 1.234157919883728, 'learning_rate': 6.58772554605888e-06, 'epoch': 5.32}
{'eval_loss': 0.1688489019870758, 'eval_precision': 0.7424349049964813, 'eval_recall': 0.7971288250850019, 'eval_f1': 0.7688103479686645, 'eval_accuracy': 0.952041974778313, 'eval_runtime': 3.9689, 'eval_samples_per_second': 818.868, 'eval_steps_per_second': 5.543, 'epoch': 6.0}
{'eval_loss': 0.1669256091117859, 'eval_precision': 0.7442126573365021, 'eval_recall': 0.7985455232338496, 'eval_f1': 0.77042234270354, 'eval_accuracy': 0.9525329078579976, 'eval_runtime': 4.0167, 'eval_samples_per_second': 809.13, 'eval_steps_per_second': 5.477, 'epoch': 7.0}
{'eval_loss': 0.16586633026599884, 'eval_precision': 0.746079295154185, 'eval_recall': 0.7997733282961843, 'eval_f1': 0.7719938007110949, 'eval_accuracy': 0.9528243993740603, 'eval_runtime': 4.0116, 'eval_samples_per_second': 810.144, 'eval_steps_per_second': 5.484, 'epoch': 8.0}
{'eval_loss': 0.16548708081245422, 'eval_precision': 0.746

## Melanoma identification

In [2]:
skincancer_model_path = "apple/mobilevit-small"

skin_cancer = SkinCancer()
skin_cancer.load_image_processor(skincancer_model_path)
skin_cancer.prepare_dataset()

In [3]:
optimizers = {
    "adamw": (torch.optim.AdamW, {"lr": 2e-4}),
    "rmsprop": (torch.optim.RMSprop, {"lr": 2e-4, "momentum": 0}),
    "adagrad": (torch.optim.Adagrad, {"lr": 3e-5}),
}

schedulers = {
    "linear": {"num_warmup_steps": 0},
    "cosine": {"num_warmup_steps": 0},
    "polynomial": {"num_warmup_steps": 0, "power": 2}
}

In [4]:
df = skin_cancer.train(
    output_dir = "test/",
    result_csv = "test.csv",
    optimizers = optimizers,
    schedulers = schedulers,
    image_processor_path= skincancer_model_path,
    model_path = skincancer_model_path,
    epochs = 1,
    batch_size = 24,
    strategy="epoch",
    device = "cuda"
)

----------------------------
Optimizer: adamw | Scheduler: linear


Some weights of MobileViTForImageClassification were not initialized from the model checkpoint at apple/mobilevit-small and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 640]) in the checkpoint and torch.Size([7, 640]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: MobileViTForImageClassification.forward() got an unexpected keyword argument 'age'

In [41]:
#dataset = load_dataset(path = "ai4privacy/pii-masking-200k", data_files = "english_pii_43k.jsonl")
dataset = load_dataset(path = "conll2003")

In [5]:
dataset = load_dataset("marmal88/skin_cancer")

In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'image_id', 'lesion_id', 'dx', 'dx_type', 'age', 'sex', 'localization'],
        num_rows: 9577
    })
    validation: Dataset({
        features: ['image', 'image_id', 'lesion_id', 'dx', 'dx_type', 'age', 'sex', 'localization'],
        num_rows: 2492
    })
    test: Dataset({
        features: ['image', 'image_id', 'lesion_id', 'dx', 'dx_type', 'age', 'sex', 'localization'],
        num_rows: 1285
    })
})

In [14]:
dataset["train"][0]["dx"]

'actinic_keratoses'

SGD Algorithms: 
- "adamw_torch"
- "adagrad"
- "rmsprop"


LR Schedulers:

- "cosine"
- "inverse_sqrt"

inverse_sqrt = lambda step: 1/math.sqrt(step, 100)

scheduler = LambdaLR(optimizer, lr_lambda = inverse_sqrt)


- "reduce_lr_on_plateau"