This notebook is using transfer learning from a ViT model from huggingface


In [1]:
import multiprocessing
import transformers
import torch
import wandb
import numpy as np
import pytorch_lightning as pl

from torchvision import transforms
from torchvision.datasets import Food101
from torchvision.datasets.utils import download_url
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

from datasets import load_metric, load_dataset
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
from transformers import AutoFeatureExtractor
from transformers import DefaultDataCollator

In [3]:
metric = load_metric("accuracy")
food = load_dataset("food101", split="train[:5000]")

splits = food.train_test_split(test_size=0.15)
train = splits['train']
val = splits['test']

display(train.shape, val.shape)

Using custom data configuration default
Reusing dataset food101 (C:\Users\truon\.cache\huggingface\datasets\food101\default\0.0.0\7cebe41a80fb2da3f08fcbef769c8874073a86346f7fb96dc0847d4dfc318295)


(4250, 2)

(750, 2)

In [4]:
# Process image to tensor using ViT method (16x16 patches)
feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")
feature_extractor

ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "size": 224
}

The `AutoFeatureExtractor.from_pretrained()` method helps us make sure we are 
- (1) resizing the inputs to the appropriate size 
- (2) using the appropriate image mean and standard deviation for the model architecture we are going to use

In [5]:
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
_transforms = Compose(
    [
            RandomResizedCrop(feature_extractor.size)
            ,ToTensor()
            ,normalize
    ]
)

val_transforms = Compose(
        [
            RandomResizedCrop(feature_extractor.size),
            ToTensor(),
            normalize,
        ]
    )

def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

# when the data is loaded, it will apply the transformation above
train = train.with_transform(transforms)
val = val.with_transform(transforms)

# example of how our data is organized, dictionary with 2 key-value pairs
train[0]


{'label': 6,
 'pixel_values': tensor([[[-0.1137, -0.1373, -0.1216,  ...,  0.3961,  0.3725,  0.0824],
          [-0.0902, -0.1216, -0.1529,  ...,  0.4039,  0.1843, -0.0980],
          [-0.0667, -0.1059, -0.1608,  ...,  0.4745,  0.2000, -0.0431],
          ...,
          [-0.9608, -0.9765, -0.9843,  ...,  0.3412,  0.4510,  0.3882],
          [-0.9529, -0.9686, -0.9765,  ...,  0.5216,  0.5843,  0.5059],
          [-0.9451, -0.9608, -0.9608,  ...,  0.6549,  0.6392,  0.5765]],
 
         [[ 0.1451,  0.1373,  0.1529,  ...,  0.6549,  0.6235,  0.3333],
          [ 0.1765,  0.1529,  0.1137,  ...,  0.6784,  0.4510,  0.1765],
          [ 0.2078,  0.1529,  0.0824,  ...,  0.7569,  0.4824,  0.2392],
          ...,
          [-0.9529, -0.9608, -0.9686,  ...,  0.5922,  0.7020,  0.6392],
          [-0.9451, -0.9608, -0.9608,  ...,  0.7569,  0.8196,  0.7412],
          [-0.9373, -0.9529, -0.9529,  ...,  0.8902,  0.8667,  0.8039]],
 
         [[ 0.3176,  0.3098,  0.3255,  ...,  0.7569,  0.7255,  0.4353],

In [6]:
# 3 RGBs, 224 x 224 pixels
train[0]['pixel_values'].shape

torch.Size([3, 224, 224])

In [7]:
# ~ creating a dataloader, creates batches 
# `pt` is for PyTorch Tensor
data_collator = DefaultDataCollator()
data_collator

DefaultDataCollator(return_tensors='pt')

In [8]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [9]:
labels = train.features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

model = AutoModelForImageClassification.from_pretrained(
    pretrained_model_name_or_path="google/vit-base-patch16-224-in21k",
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=32,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=10,
    fp16=True,
    save_steps=100,
    eval_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    save_total_limit=2,
    remove_unused_columns=False,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="wandb",
    #push_to_hub=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train,
    eval_dataset=val,
    compute_metrics = compute_metrics,
    tokenizer=feature_extractor,
)


PyTorch: setting up devices
Using cuda_amp half precision backend


In [15]:
train_results = trainer.train()
trainer.save_model(output_dir = "./models/")
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

***** Running training *****
  Num examples = 4250
  Num Epochs = 10
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1330
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


  0%|          | 0/1330 [00:00<?, ?it/s]

{'loss': 0.3071, 'learning_rate': 0.00019849624060150375, 'epoch': 0.08}
{'loss': 0.2252, 'learning_rate': 0.00019699248120300754, 'epoch': 0.15}
{'loss': 0.1418, 'learning_rate': 0.00019548872180451127, 'epoch': 0.23}
{'loss': 0.2298, 'learning_rate': 0.00019398496240601503, 'epoch': 0.3}
{'loss': 0.1833, 'learning_rate': 0.0001924812030075188, 'epoch': 0.38}
{'loss': 0.1479, 'learning_rate': 0.00019097744360902256, 'epoch': 0.45}
{'loss': 0.1387, 'learning_rate': 0.00018947368421052632, 'epoch': 0.53}
{'loss': 0.1216, 'learning_rate': 0.00018796992481203009, 'epoch': 0.6}
{'loss': 0.1127, 'learning_rate': 0.00018646616541353382, 'epoch': 0.68}
{'loss': 0.1442, 'learning_rate': 0.0001849624060150376, 'epoch': 0.75}
{'loss': 0.2092, 'learning_rate': 0.00018345864661654135, 'epoch': 0.83}
{'loss': 0.0971, 'learning_rate': 0.0001819548872180451, 'epoch': 0.9}
{'loss': 0.2686, 'learning_rate': 0.00018045112781954887, 'epoch': 0.98}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-133
Configuration saved in ./results\checkpoint-133\config.json


{'eval_loss': 0.3834095001220703, 'eval_accuracy': 0.8933333333333333, 'eval_runtime': 7.0882, 'eval_samples_per_second': 105.809, 'eval_steps_per_second': 13.261, 'epoch': 1.0}


Model weights saved in ./results\checkpoint-133\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-133\preprocessor_config.json


{'loss': 0.1715, 'learning_rate': 0.00017894736842105264, 'epoch': 1.05}
{'loss': 0.203, 'learning_rate': 0.0001774436090225564, 'epoch': 1.13}
{'loss': 0.2424, 'learning_rate': 0.00017593984962406016, 'epoch': 1.2}
{'loss': 0.2242, 'learning_rate': 0.0001744360902255639, 'epoch': 1.28}
{'loss': 0.2288, 'learning_rate': 0.0001729323308270677, 'epoch': 1.35}
{'loss': 0.2521, 'learning_rate': 0.00017142857142857143, 'epoch': 1.43}
{'loss': 0.1919, 'learning_rate': 0.0001699248120300752, 'epoch': 1.5}
{'loss': 0.2513, 'learning_rate': 0.00016842105263157895, 'epoch': 1.58}
{'loss': 0.3379, 'learning_rate': 0.00016691729323308271, 'epoch': 1.65}
{'loss': 0.2317, 'learning_rate': 0.00016541353383458648, 'epoch': 1.73}
{'loss': 0.1691, 'learning_rate': 0.00016390977443609024, 'epoch': 1.8}
{'loss': 0.3017, 'learning_rate': 0.00016240601503759398, 'epoch': 1.88}
{'loss': 0.217, 'learning_rate': 0.00016090225563909777, 'epoch': 1.95}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-266
Configuration saved in ./results\checkpoint-266\config.json


{'eval_loss': 0.33890262246131897, 'eval_accuracy': 0.912, 'eval_runtime': 7.1448, 'eval_samples_per_second': 104.972, 'eval_steps_per_second': 13.157, 'epoch': 2.0}


Model weights saved in ./results\checkpoint-266\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-266\preprocessor_config.json


{'loss': 0.2092, 'learning_rate': 0.0001593984962406015, 'epoch': 2.03}
{'loss': 0.2391, 'learning_rate': 0.00015789473684210527, 'epoch': 2.11}
{'loss': 0.1928, 'learning_rate': 0.00015639097744360903, 'epoch': 2.18}
{'loss': 0.1216, 'learning_rate': 0.0001548872180451128, 'epoch': 2.26}
{'loss': 0.1626, 'learning_rate': 0.00015338345864661653, 'epoch': 2.33}
{'loss': 0.1679, 'learning_rate': 0.00015187969924812032, 'epoch': 2.41}
{'loss': 0.1718, 'learning_rate': 0.00015037593984962405, 'epoch': 2.48}
{'loss': 0.2066, 'learning_rate': 0.00014887218045112784, 'epoch': 2.56}
{'loss': 0.105, 'learning_rate': 0.00014736842105263158, 'epoch': 2.63}
{'loss': 0.1982, 'learning_rate': 0.00014586466165413534, 'epoch': 2.71}
{'loss': 0.275, 'learning_rate': 0.0001443609022556391, 'epoch': 2.78}
{'loss': 0.1738, 'learning_rate': 0.00014285714285714287, 'epoch': 2.86}
{'loss': 0.0918, 'learning_rate': 0.0001413533834586466, 'epoch': 2.93}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-399
Configuration saved in ./results\checkpoint-399\config.json


{'eval_loss': 0.33352261781692505, 'eval_accuracy': 0.8986666666666666, 'eval_runtime': 7.2128, 'eval_samples_per_second': 103.982, 'eval_steps_per_second': 13.032, 'epoch': 3.0}


Model weights saved in ./results\checkpoint-399\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-399\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-133] due to args.save_total_limit


{'loss': 0.1747, 'learning_rate': 0.0001398496240601504, 'epoch': 3.01}
{'loss': 0.2128, 'learning_rate': 0.00013834586466165413, 'epoch': 3.08}
{'loss': 0.1345, 'learning_rate': 0.0001368421052631579, 'epoch': 3.16}
{'loss': 0.1051, 'learning_rate': 0.00013533834586466166, 'epoch': 3.23}
{'loss': 0.2034, 'learning_rate': 0.00013383458646616542, 'epoch': 3.31}
{'loss': 0.2135, 'learning_rate': 0.00013233082706766918, 'epoch': 3.38}
{'loss': 0.1409, 'learning_rate': 0.00013082706766917294, 'epoch': 3.46}
{'loss': 0.1084, 'learning_rate': 0.00012932330827067668, 'epoch': 3.53}
{'loss': 0.1449, 'learning_rate': 0.00012781954887218047, 'epoch': 3.61}
{'loss': 0.1265, 'learning_rate': 0.0001263157894736842, 'epoch': 3.68}
{'loss': 0.1387, 'learning_rate': 0.00012481203007518797, 'epoch': 3.76}
{'loss': 0.1593, 'learning_rate': 0.00012330827067669173, 'epoch': 3.83}
{'loss': 0.2145, 'learning_rate': 0.0001218045112781955, 'epoch': 3.91}
{'loss': 0.2231, 'learning_rate': 0.0001203007518796992

***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-532
Configuration saved in ./results\checkpoint-532\config.json


{'eval_loss': 0.30968499183654785, 'eval_accuracy': 0.9066666666666666, 'eval_runtime': 7.0057, 'eval_samples_per_second': 107.055, 'eval_steps_per_second': 13.418, 'epoch': 4.0}


Model weights saved in ./results\checkpoint-532\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-532\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-399] due to args.save_total_limit


{'loss': 0.1108, 'learning_rate': 0.00011879699248120302, 'epoch': 4.06}
{'loss': 0.1266, 'learning_rate': 0.00011729323308270677, 'epoch': 4.14}
{'loss': 0.1448, 'learning_rate': 0.00011578947368421053, 'epoch': 4.21}
{'loss': 0.1426, 'learning_rate': 0.00011428571428571428, 'epoch': 4.29}
{'loss': 0.0821, 'learning_rate': 0.00011278195488721806, 'epoch': 4.36}
{'loss': 0.0769, 'learning_rate': 0.00011127819548872181, 'epoch': 4.44}
{'loss': 0.0775, 'learning_rate': 0.00010977443609022557, 'epoch': 4.51}
{'loss': 0.0913, 'learning_rate': 0.00010827067669172932, 'epoch': 4.59}
{'loss': 0.1402, 'learning_rate': 0.0001067669172932331, 'epoch': 4.66}
{'loss': 0.1133, 'learning_rate': 0.00010526315789473685, 'epoch': 4.74}
{'loss': 0.0647, 'learning_rate': 0.00010375939849624061, 'epoch': 4.81}
{'loss': 0.2037, 'learning_rate': 0.00010225563909774436, 'epoch': 4.89}
{'loss': 0.1217, 'learning_rate': 0.00010075187969924814, 'epoch': 4.96}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-665
Configuration saved in ./results\checkpoint-665\config.json


{'eval_loss': 0.3296864330768585, 'eval_accuracy': 0.9093333333333333, 'eval_runtime': 7.2518, 'eval_samples_per_second': 103.423, 'eval_steps_per_second': 12.962, 'epoch': 5.0}


Model weights saved in ./results\checkpoint-665\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-665\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-532] due to args.save_total_limit


{'loss': 0.0954, 'learning_rate': 9.924812030075187e-05, 'epoch': 5.04}
{'loss': 0.1289, 'learning_rate': 9.774436090225564e-05, 'epoch': 5.11}
{'loss': 0.1183, 'learning_rate': 9.62406015037594e-05, 'epoch': 5.19}
{'loss': 0.1103, 'learning_rate': 9.473684210526316e-05, 'epoch': 5.26}
{'loss': 0.0825, 'learning_rate': 9.323308270676691e-05, 'epoch': 5.34}
{'loss': 0.1384, 'learning_rate': 9.172932330827067e-05, 'epoch': 5.41}
{'loss': 0.0947, 'learning_rate': 9.022556390977444e-05, 'epoch': 5.49}
{'loss': 0.0749, 'learning_rate': 8.87218045112782e-05, 'epoch': 5.56}
{'loss': 0.1132, 'learning_rate': 8.721804511278195e-05, 'epoch': 5.64}
{'loss': 0.0967, 'learning_rate': 8.571428571428571e-05, 'epoch': 5.71}
{'loss': 0.1536, 'learning_rate': 8.421052631578948e-05, 'epoch': 5.79}
{'loss': 0.1011, 'learning_rate': 8.270676691729324e-05, 'epoch': 5.86}
{'loss': 0.0857, 'learning_rate': 8.120300751879699e-05, 'epoch': 5.94}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-798
Configuration saved in ./results\checkpoint-798\config.json


{'eval_loss': 0.2937006950378418, 'eval_accuracy': 0.9133333333333333, 'eval_runtime': 7.1688, 'eval_samples_per_second': 104.621, 'eval_steps_per_second': 13.112, 'epoch': 6.0}


Model weights saved in ./results\checkpoint-798\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-798\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-266] due to args.save_total_limit


{'loss': 0.0815, 'learning_rate': 7.969924812030075e-05, 'epoch': 6.02}
{'loss': 0.0497, 'learning_rate': 7.819548872180451e-05, 'epoch': 6.09}
{'loss': 0.0648, 'learning_rate': 7.669172932330826e-05, 'epoch': 6.17}
{'loss': 0.1153, 'learning_rate': 7.518796992481203e-05, 'epoch': 6.24}
{'loss': 0.1235, 'learning_rate': 7.368421052631579e-05, 'epoch': 6.32}
{'loss': 0.1117, 'learning_rate': 7.218045112781955e-05, 'epoch': 6.39}
{'loss': 0.0539, 'learning_rate': 7.06766917293233e-05, 'epoch': 6.47}
{'loss': 0.0819, 'learning_rate': 6.917293233082706e-05, 'epoch': 6.54}
{'loss': 0.1019, 'learning_rate': 6.766917293233083e-05, 'epoch': 6.62}
{'loss': 0.0415, 'learning_rate': 6.616541353383459e-05, 'epoch': 6.69}
{'loss': 0.0442, 'learning_rate': 6.466165413533834e-05, 'epoch': 6.77}
{'loss': 0.0314, 'learning_rate': 6.31578947368421e-05, 'epoch': 6.84}
{'loss': 0.043, 'learning_rate': 6.165413533834587e-05, 'epoch': 6.92}
{'loss': 0.0922, 'learning_rate': 6.015037593984962e-05, 'epoch': 6

***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-931
Configuration saved in ./results\checkpoint-931\config.json


{'eval_loss': 0.27657610177993774, 'eval_accuracy': 0.9253333333333333, 'eval_runtime': 7.1368, 'eval_samples_per_second': 105.09, 'eval_steps_per_second': 13.171, 'epoch': 7.0}


Model weights saved in ./results\checkpoint-931\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-931\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-665] due to args.save_total_limit


{'loss': 0.0447, 'learning_rate': 5.8646616541353386e-05, 'epoch': 7.07}
{'loss': 0.082, 'learning_rate': 5.714285714285714e-05, 'epoch': 7.14}
{'loss': 0.0638, 'learning_rate': 5.5639097744360905e-05, 'epoch': 7.22}
{'loss': 0.1124, 'learning_rate': 5.413533834586466e-05, 'epoch': 7.29}
{'loss': 0.0534, 'learning_rate': 5.2631578947368424e-05, 'epoch': 7.37}
{'loss': 0.0543, 'learning_rate': 5.112781954887218e-05, 'epoch': 7.44}
{'loss': 0.0779, 'learning_rate': 4.9624060150375936e-05, 'epoch': 7.52}
{'loss': 0.0578, 'learning_rate': 4.81203007518797e-05, 'epoch': 7.59}
{'loss': 0.0898, 'learning_rate': 4.6616541353383456e-05, 'epoch': 7.67}
{'loss': 0.1046, 'learning_rate': 4.511278195488722e-05, 'epoch': 7.74}
{'loss': 0.1118, 'learning_rate': 4.3609022556390975e-05, 'epoch': 7.82}
{'loss': 0.0285, 'learning_rate': 4.210526315789474e-05, 'epoch': 7.89}
{'loss': 0.1242, 'learning_rate': 4.0601503759398494e-05, 'epoch': 7.97}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-1064
Configuration saved in ./results\checkpoint-1064\config.json


{'eval_loss': 0.28264448046684265, 'eval_accuracy': 0.9306666666666666, 'eval_runtime': 7.2713, 'eval_samples_per_second': 103.146, 'eval_steps_per_second': 12.928, 'epoch': 8.0}


Model weights saved in ./results\checkpoint-1064\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-1064\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-798] due to args.save_total_limit


{'loss': 0.0379, 'learning_rate': 3.909774436090226e-05, 'epoch': 8.05}
{'loss': 0.1233, 'learning_rate': 3.759398496240601e-05, 'epoch': 8.12}
{'loss': 0.0744, 'learning_rate': 3.6090225563909776e-05, 'epoch': 8.2}
{'loss': 0.1187, 'learning_rate': 3.458646616541353e-05, 'epoch': 8.27}
{'loss': 0.0335, 'learning_rate': 3.3082706766917295e-05, 'epoch': 8.35}
{'loss': 0.0412, 'learning_rate': 3.157894736842105e-05, 'epoch': 8.42}
{'loss': 0.0892, 'learning_rate': 3.007518796992481e-05, 'epoch': 8.5}
{'loss': 0.0763, 'learning_rate': 2.857142857142857e-05, 'epoch': 8.57}
{'loss': 0.0858, 'learning_rate': 2.706766917293233e-05, 'epoch': 8.65}
{'loss': 0.0541, 'learning_rate': 2.556390977443609e-05, 'epoch': 8.72}
{'loss': 0.0788, 'learning_rate': 2.406015037593985e-05, 'epoch': 8.8}
{'loss': 0.0553, 'learning_rate': 2.255639097744361e-05, 'epoch': 8.87}
{'loss': 0.0931, 'learning_rate': 2.105263157894737e-05, 'epoch': 8.95}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-1197
Configuration saved in ./results\checkpoint-1197\config.json


{'eval_loss': 0.26141634583473206, 'eval_accuracy': 0.9306666666666666, 'eval_runtime': 7.0912, 'eval_samples_per_second': 105.764, 'eval_steps_per_second': 13.256, 'epoch': 9.0}


Model weights saved in ./results\checkpoint-1197\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-1197\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-931] due to args.save_total_limit


{'loss': 0.1062, 'learning_rate': 1.954887218045113e-05, 'epoch': 9.02}
{'loss': 0.05, 'learning_rate': 1.8045112781954888e-05, 'epoch': 9.1}
{'loss': 0.0583, 'learning_rate': 1.6541353383458648e-05, 'epoch': 9.17}
{'loss': 0.0755, 'learning_rate': 1.5037593984962406e-05, 'epoch': 9.25}
{'loss': 0.085, 'learning_rate': 1.3533834586466165e-05, 'epoch': 9.32}
{'loss': 0.0264, 'learning_rate': 1.2030075187969925e-05, 'epoch': 9.4}
{'loss': 0.0944, 'learning_rate': 1.0526315789473684e-05, 'epoch': 9.47}
{'loss': 0.092, 'learning_rate': 9.022556390977444e-06, 'epoch': 9.55}
{'loss': 0.0299, 'learning_rate': 7.518796992481203e-06, 'epoch': 9.62}
{'loss': 0.0693, 'learning_rate': 6.015037593984962e-06, 'epoch': 9.7}
{'loss': 0.0461, 'learning_rate': 4.511278195488722e-06, 'epoch': 9.77}
{'loss': 0.0241, 'learning_rate': 3.007518796992481e-06, 'epoch': 9.85}
{'loss': 0.017, 'learning_rate': 1.5037593984962406e-06, 'epoch': 9.92}


***** Running Evaluation *****
  Num examples = 750
  Batch size = 8


{'loss': 0.0728, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/94 [00:00<?, ?it/s]

Saving model checkpoint to ./results\checkpoint-1330
Configuration saved in ./results\checkpoint-1330\config.json


{'eval_loss': 0.2562807500362396, 'eval_accuracy': 0.9306666666666666, 'eval_runtime': 7.0987, 'eval_samples_per_second': 105.652, 'eval_steps_per_second': 13.242, 'epoch': 10.0}


Model weights saved in ./results\checkpoint-1330\pytorch_model.bin
Feature extractor saved in ./results\checkpoint-1330\preprocessor_config.json
Deleting older checkpoint [results\checkpoint-1197] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results\checkpoint-1064 (score: 0.9306666666666666).
Saving model checkpoint to ./models/
Configuration saved in ./models/config.json


{'train_runtime': 729.4853, 'train_samples_per_second': 58.26, 'train_steps_per_second': 1.823, 'train_loss': 0.12560736727445646, 'epoch': 10.0}


Model weights saved in ./models/pytorch_model.bin
Feature extractor saved in ./models/preprocessor_config.json


***** train metrics *****
  epoch                    =       10.0
  train_loss               =     0.1256
  train_runtime            = 0:12:09.48
  train_samples_per_second =      58.26
  train_steps_per_second   =      1.823
