In [1]:
import torch
import transformers

from datasets import load_dataset
import numpy as np

from project import CreditDefault, PII, SkinCancer, set_seed

  from .autonotebook import tqdm as notebook_tqdm


## Credit-card default

In [2]:
set_seed(1234)

In [3]:
credit = CreditDefault()
credit.prepare_dataset("cuda")

In [6]:
optimizers = {
    "adamw": (torch.optim.AdamW, {"lr": 0.01}),
    "rmsprop": (torch.optim.RMSprop, {"lr": 0.01}),
    "adagrad": (torch.optim.Adagrad, {"lr": 0.01}),
}

schedulers = {
    "linear": (torch.optim.lr_scheduler.LinearLR, {"start_factor": 0.01, "end_factor": 0.0001, "total_iters": 2}),
    "cos_warm": (torch.optim.lr_scheduler.CosineAnnealingWarmRestarts, {"T_0": 2, "eta_min": 0.01}),
    "plateau": (torch.optim.lr_scheduler.ReduceLROnPlateau, dict())
}

In [7]:
df = credit.train(
    result_csv = "test.csv", 
    optimizers = optimizers,
    schedulers = schedulers,
    epochs = 5,
    batch_size = 1000,
    device = "cuda",
    loss_func = torch.nn.MSELoss
    )

-------------------------------------
|linear, adamw|
Epoch 1
	batch 1 | loss: 0.4424096345901489 | accuracy: 0.537
	batch 2 | loss: 0.4221212863922119 | accuracy: 0.556
	batch 3 | loss: 0.41576284170150757 | accuracy: 0.563
	batch 4 | loss: 0.3999929428100586 | accuracy: 0.584
	batch 5 | loss: 0.3750000596046448 | accuracy: 0.605
	batch 6 | loss: 0.35946759581565857 | accuracy: 0.618
	batch 7 | loss: 0.3029974699020386 | accuracy: 0.673
	batch 8 | loss: 0.3039945960044861 | accuracy: 0.68
	batch 9 | loss: 0.24527829885482788 | accuracy: 0.725
	batch 10 | loss: 0.24620354175567627 | accuracy: 0.715
	batch 11 | loss: 0.2339823693037033 | accuracy: 0.742
	batch 12 | loss: 0.23700402677059174 | accuracy: 0.746
	batch 13 | loss: 0.22199484705924988 | accuracy: 0.767
	batch 14 | loss: 0.2247716784477234 | accuracy: 0.767
	batch 15 | loss: 0.23900000751018524 | accuracy: 0.755
	batch 16 | loss: 0.2389996200799942 | accuracy: 0.753
	batch 17 | loss: 0.2141554355621338 | accuracy: 0.78
	batch 

## PII

In [None]:
#dataset = load_dataset(path = "ai4privacy/pii-masking-200k", data_files = "english_pii_43k.jsonl")
dataset = load_dataset(path = "conll2003")

In [None]:
dataset = load_dataset("marmal88/skin_cancer")

Downloading readme: 100%|██████████| 3.24k/3.24k [00:00<?, ?B/s]
Downloading data: 100%|██████████| 521M/521M [01:21<00:00, 6.37MB/s] 
Downloading data: 100%|██████████| 525M/525M [01:24<00:00, 6.24MB/s] 
Downloading data: 100%|██████████| 527M/527M [01:24<00:00, 6.23MB/s] 
Downloading data: 100%|██████████| 528M/528M [01:19<00:00, 6.60MB/s] 
Downloading data: 100%|██████████| 548M/548M [01:26<00:00, 6.33MB/s] 
Downloading data: 100%|██████████| 341M/341M [00:48<00:00, 7.03MB/s] 
Downloading data: 100%|██████████| 348M/348M [00:48<00:00, 7.21MB/s] 
Downloading data: 100%|██████████| 355M/355M [00:54<00:00, 6.46MB/s] 
Generating train split: 100%|██████████| 9577/9577 [00:04<00:00, 2388.92 examples/s]
Generating validation split: 100%|██████████| 2492/2492 [00:01<00:00, 2204.03 examples/s]
Generating test split: 100%|██████████| 1285/1285 [00:00<00:00, 2136.98 examples/s]


In [None]:
dataset["train"][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=600x450>,
 'image_id': 'ISIC_0024329',
 'lesion_id': 'HAM_0002954',
 'dx': 'actinic_keratoses',
 'dx_type': 'histo',
 'age': 75.0,
 'sex': 'female',
 'localization': 'lower extremity'}

SGD Algorithms: 
- "adamw_torch"
- "adagrad"
- "rmsprop"


LR Schedulers:

- "cosine"
- "inverse_sqrt"

inverse_sqrt = lambda step: 1/math.sqrt(step, 100)

scheduler = LambdaLR(optimizer, lr_lambda = inverse_sqrt)


- "reduce_lr_on_plateau"