In [1]:
# !python -m pip install torch datasets evaluate transformers

In [4]:
import datasets
import evaluate
import transformers

import numpy as np
import torch
import torchvision

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_checkpoint = "google/vit-base-patch16-224-in21k" # pre-trained model from which to fine-tune
batch_size = 8 # batch size for training and evaluation

In [4]:
dataset = datasets.load_dataset("imagefolder", data_dir="D:/my_gestures", cache_dir="D:/.cache/")
dataset.cleanup_cache_files()
print(dataset)

Resolving data files: 100%|██████████| 6000/6000 [00:00<00:00, 16574.26it/s]
Resolving data files: 100%|██████████| 1500/1500 [00:00<00:00, 38468.09it/s]
Found cached dataset imagefolder (D:/.cache/imagefolder/default-3512ca76eea585d5/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f)
100%|██████████| 2/2 [00:00<00:00, 25.64it/s]

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 6000
    })
    validation: Dataset({
        features: ['image', 'label'],
        num_rows: 1500
    })
})





In [5]:
metric = evaluate.load("accuracy")

In [6]:
labels = dataset["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

print(labels)

['00_fist', '01_one', '02_two', '03_three', '04_four', '05_five']


In [7]:
feature_extractor = transformers.AutoFeatureExtractor.from_pretrained(model_checkpoint)
print(feature_extractor)

ViTFeatureExtractor {
  "do_normalize": true,
  "do_rescale": true,
  "do_resize": true,
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_processor_type": "ViTFeatureExtractor",
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "rescale_factor": 0.00392156862745098,
  "size": {
    "height": 224,
    "width": 224
  }
}





In [8]:
import albumentations as A

size = 224

train_transforms = A.Compose([
    A.Resize(height=size, width=size),
    A.Normalize()
])

val_transforms = A.Compose([
    A.Resize(height=size, width=size),
    A.Normalize()
])

def preprocess_train(examples):
    examples['pixel_values'] = [
        train_transforms(image=np.array(image))["image"] for image in examples["image"]
    ]
    return examples

def preprocess_val(examples):
    examples['pixel_values'] = [
        val_transforms(image=np.array(image))["image"] for image in examples["image"]
    ]
    return examples

In [9]:
train_ds = dataset['train']
val_ds = dataset['validation']

train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_val)

In [10]:
model = transformers.AutoModelForImageClassification.from_pretrained(
    model_checkpoint,
    label2id=label2id,
    id2label=id2label,
    ignore_mismatched_sizes=True,
)

Some weights of the model checkpoint at google/vit-base-patch16-224-in21k were not used when initializing ViTForImageClassification: ['pooler.dense.weight', 'pooler.dense.bias']
- This IS expected if you are initializing ViTForImageClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ViTForImageClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
model_name = model_checkpoint.split("/")[-1]

args = transformers.TrainingArguments(
    f"{model_name}-finetuned-gestures",
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=1e-3,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)

In [12]:
def compute_metrics(eval_pred):
    """Computes accuracy on a batch of predictions"""
    predictions = np.argmax(eval_pred.predictions, axis=1)
    return metric.compute(predictions=predictions, references=eval_pred.label_ids)

In [13]:
def collate_fn(examples):
    images = []
    labels = []
    for example in examples:
        image = np.moveaxis(example["pixel_values"], source=2, destination=0)
        images.append(torch.from_numpy(image))
        labels.append(example["label"])
        
    pixel_values = torch.stack(images)
    labels = torch.tensor(labels)
    return {"pixel_values": pixel_values, "labels": labels}

In [14]:
trainer = transformers.Trainer(
    model,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=feature_extractor,
    compute_metrics=compute_metrics,
    data_collator=collate_fn,
)

In [15]:
train_results = trainer.train()
# rest is optional but nice to have
trainer.save_model("D:/saved_models/vit-base-patch16-224-in21k-finetuned-gestures/")
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()


  1%|          | 10/935 [00:21<30:37,  1.99s/it] 

{'loss': 1.7832, 'learning_rate': 0.00010638297872340425, 'epoch': 0.05}


  2%|▏         | 20/935 [00:41<30:07,  1.97s/it]

{'loss': 1.5243, 'learning_rate': 0.0002127659574468085, 'epoch': 0.11}


  3%|▎         | 30/935 [01:01<29:55,  1.98s/it]

{'loss': 1.0065, 'learning_rate': 0.0003191489361702128, 'epoch': 0.16}


  4%|▍         | 40/935 [01:21<29:44,  1.99s/it]

{'loss': 0.6222, 'learning_rate': 0.000425531914893617, 'epoch': 0.21}


  5%|▌         | 50/935 [01:41<29:42,  2.01s/it]

{'loss': 0.4441, 'learning_rate': 0.0005319148936170213, 'epoch': 0.27}


  6%|▋         | 60/935 [02:02<30:33,  2.10s/it]

{'loss': 0.5891, 'learning_rate': 0.0006382978723404256, 'epoch': 0.32}


  7%|▋         | 70/935 [02:24<31:46,  2.20s/it]

{'loss': 0.5559, 'learning_rate': 0.0007446808510638298, 'epoch': 0.37}


  9%|▊         | 80/935 [02:47<32:07,  2.25s/it]

{'loss': 0.5916, 'learning_rate': 0.000851063829787234, 'epoch': 0.43}


 10%|▉         | 90/935 [03:10<32:14,  2.29s/it]

{'loss': 0.6992, 'learning_rate': 0.0009574468085106384, 'epoch': 0.48}


 11%|█         | 100/935 [03:32<32:12,  2.31s/it]

{'loss': 0.7668, 'learning_rate': 0.0009928656361474434, 'epoch': 0.53}


 12%|█▏        | 110/935 [03:55<30:58,  2.25s/it]

{'loss': 0.5344, 'learning_rate': 0.000980975029726516, 'epoch': 0.59}


 13%|█▎        | 120/935 [04:19<31:23,  2.31s/it]

{'loss': 0.7503, 'learning_rate': 0.0009690844233055887, 'epoch': 0.64}


 14%|█▍        | 130/935 [04:41<30:31,  2.28s/it]

{'loss': 1.2413, 'learning_rate': 0.0009571938168846611, 'epoch': 0.69}


 15%|█▍        | 140/935 [05:05<31:24,  2.37s/it]

{'loss': 0.7317, 'learning_rate': 0.0009453032104637337, 'epoch': 0.75}


 16%|█▌        | 150/935 [05:28<30:21,  2.32s/it]

{'loss': 0.8095, 'learning_rate': 0.0009334126040428062, 'epoch': 0.8}


 17%|█▋        | 160/935 [05:51<30:15,  2.34s/it]

{'loss': 0.6983, 'learning_rate': 0.0009215219976218787, 'epoch': 0.85}


 18%|█▊        | 170/935 [06:14<28:50,  2.26s/it]

{'loss': 0.7349, 'learning_rate': 0.0009096313912009512, 'epoch': 0.91}


 19%|█▉        | 180/935 [06:37<28:42,  2.28s/it]

{'loss': 0.626, 'learning_rate': 0.0008977407847800238, 'epoch': 0.96}


                                                 
 20%|██        | 187/935 [07:36<29:14,  2.35s/it]

{'eval_loss': 0.8405848145484924, 'eval_accuracy': 0.694, 'eval_runtime': 41.6042, 'eval_samples_per_second': 36.054, 'eval_steps_per_second': 4.519, 'epoch': 1.0}


 20%|██        | 190/935 [07:44<1:47:40,  8.67s/it]

{'loss': 0.6107, 'learning_rate': 0.0008858501783590964, 'epoch': 1.01}


 21%|██▏       | 200/935 [08:07<31:04,  2.54s/it]  

{'loss': 0.6092, 'learning_rate': 0.0008739595719381689, 'epoch': 1.07}


 22%|██▏       | 210/935 [08:31<28:51,  2.39s/it]

{'loss': 0.5601, 'learning_rate': 0.0008620689655172414, 'epoch': 1.12}


 24%|██▎       | 220/935 [08:53<26:52,  2.26s/it]

{'loss': 0.6521, 'learning_rate': 0.0008501783590963139, 'epoch': 1.17}


 25%|██▍       | 230/935 [09:15<26:13,  2.23s/it]

{'loss': 0.533, 'learning_rate': 0.0008382877526753865, 'epoch': 1.23}


 26%|██▌       | 240/935 [09:38<26:32,  2.29s/it]

{'loss': 0.4795, 'learning_rate': 0.0008263971462544589, 'epoch': 1.28}


 27%|██▋       | 250/935 [10:00<25:10,  2.21s/it]

{'loss': 0.5134, 'learning_rate': 0.0008145065398335315, 'epoch': 1.33}


 28%|██▊       | 260/935 [10:23<25:41,  2.28s/it]

{'loss': 0.4432, 'learning_rate': 0.0008026159334126041, 'epoch': 1.39}


 29%|██▉       | 270/935 [10:45<24:49,  2.24s/it]

{'loss': 0.5262, 'learning_rate': 0.0007907253269916765, 'epoch': 1.44}


 30%|██▉       | 280/935 [11:08<24:23,  2.23s/it]

{'loss': 0.4187, 'learning_rate': 0.0007788347205707491, 'epoch': 1.49}


 31%|███       | 290/935 [11:30<24:00,  2.23s/it]

{'loss': 0.6073, 'learning_rate': 0.0007669441141498216, 'epoch': 1.55}


 32%|███▏      | 300/935 [11:52<23:40,  2.24s/it]

{'loss': 0.5185, 'learning_rate': 0.0007550535077288942, 'epoch': 1.6}


 33%|███▎      | 310/935 [12:15<23:17,  2.24s/it]

{'loss': 0.3054, 'learning_rate': 0.0007431629013079667, 'epoch': 1.65}


 34%|███▍      | 320/935 [12:37<22:53,  2.23s/it]

{'loss': 0.5441, 'learning_rate': 0.0007312722948870393, 'epoch': 1.71}


 35%|███▌      | 330/935 [13:00<22:40,  2.25s/it]

{'loss': 0.3695, 'learning_rate': 0.0007193816884661118, 'epoch': 1.76}


 36%|███▋      | 340/935 [13:23<22:02,  2.22s/it]

{'loss': 0.4576, 'learning_rate': 0.0007074910820451843, 'epoch': 1.81}


 37%|███▋      | 350/935 [13:45<22:38,  2.32s/it]

{'loss': 0.439, 'learning_rate': 0.0006956004756242569, 'epoch': 1.87}


 39%|███▊      | 360/935 [14:08<21:00,  2.19s/it]

{'loss': 0.4171, 'learning_rate': 0.0006837098692033293, 'epoch': 1.92}


 40%|███▉      | 370/935 [14:31<21:36,  2.29s/it]

{'loss': 0.307, 'learning_rate': 0.0006718192627824019, 'epoch': 1.97}


                                                 
 40%|████      | 375/935 [15:22<20:44,  2.22s/it]

{'eval_loss': 0.35975465178489685, 'eval_accuracy': 0.8646666666666667, 'eval_runtime': 40.2536, 'eval_samples_per_second': 37.264, 'eval_steps_per_second': 4.67, 'epoch': 2.0}


 41%|████      | 380/935 [15:36<49:50,  5.39s/it]  

{'loss': 0.3135, 'learning_rate': 0.0006599286563614744, 'epoch': 2.03}


 42%|████▏     | 390/935 [16:00<22:22,  2.46s/it]

{'loss': 0.317, 'learning_rate': 0.0006480380499405469, 'epoch': 2.08}


 43%|████▎     | 400/935 [16:23<20:30,  2.30s/it]

{'loss': 0.2934, 'learning_rate': 0.0006361474435196195, 'epoch': 2.13}


 44%|████▍     | 410/935 [16:47<20:27,  2.34s/it]

{'loss': 0.2879, 'learning_rate': 0.0006242568370986921, 'epoch': 2.19}


 45%|████▍     | 420/935 [17:10<19:48,  2.31s/it]

{'loss': 0.2477, 'learning_rate': 0.0006123662306777646, 'epoch': 2.24}


 46%|████▌     | 430/935 [17:33<19:37,  2.33s/it]

{'loss': 0.2701, 'learning_rate': 0.0006004756242568371, 'epoch': 2.29}


 47%|████▋     | 440/935 [17:57<19:04,  2.31s/it]

{'loss': 0.1951, 'learning_rate': 0.0005885850178359097, 'epoch': 2.35}


 48%|████▊     | 450/935 [18:20<19:19,  2.39s/it]

{'loss': 0.2373, 'learning_rate': 0.0005766944114149822, 'epoch': 2.4}


 49%|████▉     | 460/935 [18:44<18:32,  2.34s/it]

{'loss': 0.1492, 'learning_rate': 0.0005648038049940547, 'epoch': 2.45}


 50%|█████     | 470/935 [19:07<17:55,  2.31s/it]

{'loss': 0.3099, 'learning_rate': 0.0005529131985731273, 'epoch': 2.51}


 51%|█████▏    | 480/935 [19:30<17:30,  2.31s/it]

{'loss': 0.3934, 'learning_rate': 0.0005410225921521997, 'epoch': 2.56}


 52%|█████▏    | 490/935 [19:53<17:09,  2.31s/it]

{'loss': 0.2284, 'learning_rate': 0.0005291319857312723, 'epoch': 2.61}


 53%|█████▎    | 500/935 [20:17<16:46,  2.31s/it]

{'loss': 0.1719, 'learning_rate': 0.0005172413793103448, 'epoch': 2.67}


 55%|█████▍    | 510/935 [20:40<16:27,  2.32s/it]

{'loss': 0.1523, 'learning_rate': 0.0005053507728894173, 'epoch': 2.72}


 56%|█████▌    | 520/935 [21:03<15:59,  2.31s/it]

{'loss': 0.2422, 'learning_rate': 0.0004934601664684899, 'epoch': 2.77}


 57%|█████▋    | 530/935 [21:26<15:38,  2.32s/it]

{'loss': 0.2831, 'learning_rate': 0.0004815695600475624, 'epoch': 2.83}


 58%|█████▊    | 540/935 [21:49<15:14,  2.31s/it]

{'loss': 0.1551, 'learning_rate': 0.00046967895362663497, 'epoch': 2.88}


 59%|█████▉    | 550/935 [22:13<15:47,  2.46s/it]

{'loss': 0.1808, 'learning_rate': 0.0004577883472057075, 'epoch': 2.93}


 60%|█████▉    | 560/935 [22:37<14:50,  2.38s/it]

{'loss': 0.0879, 'learning_rate': 0.00044589774078478004, 'epoch': 2.99}


                                                 
 60%|██████    | 562/935 [23:22<14:44,  2.37s/it]

{'eval_loss': 0.1418614387512207, 'eval_accuracy': 0.954, 'eval_runtime': 39.7462, 'eval_samples_per_second': 37.739, 'eval_steps_per_second': 4.73, 'epoch': 3.0}


 61%|██████    | 570/935 [23:42<20:17,  3.34s/it]  

{'loss': 0.1211, 'learning_rate': 0.0004340071343638526, 'epoch': 3.04}


 62%|██████▏   | 580/935 [24:05<14:28,  2.45s/it]

{'loss': 0.1741, 'learning_rate': 0.0004221165279429251, 'epoch': 3.09}


 63%|██████▎   | 590/935 [24:28<12:55,  2.25s/it]

{'loss': 0.1082, 'learning_rate': 0.0004102259215219976, 'epoch': 3.15}


 64%|██████▍   | 600/935 [24:50<12:32,  2.25s/it]

{'loss': 0.1765, 'learning_rate': 0.00039833531510107017, 'epoch': 3.2}


 65%|██████▌   | 610/935 [25:13<12:10,  2.25s/it]

{'loss': 0.1456, 'learning_rate': 0.0003864447086801427, 'epoch': 3.25}


 66%|██████▋   | 620/935 [25:35<11:45,  2.24s/it]

{'loss': 0.1133, 'learning_rate': 0.00037455410225921523, 'epoch': 3.31}


 67%|██████▋   | 630/935 [25:58<11:23,  2.24s/it]

{'loss': 0.1055, 'learning_rate': 0.0003626634958382878, 'epoch': 3.36}


 68%|██████▊   | 640/935 [26:21<11:17,  2.30s/it]

{'loss': 0.1074, 'learning_rate': 0.0003507728894173603, 'epoch': 3.41}


 70%|██████▉   | 650/935 [26:44<10:46,  2.27s/it]

{'loss': 0.1398, 'learning_rate': 0.0003388822829964328, 'epoch': 3.47}


 71%|███████   | 660/935 [27:07<10:29,  2.29s/it]

{'loss': 0.0786, 'learning_rate': 0.00032699167657550536, 'epoch': 3.52}


 72%|███████▏  | 670/935 [27:29<09:51,  2.23s/it]

{'loss': 0.0442, 'learning_rate': 0.00031510107015457787, 'epoch': 3.57}


 73%|███████▎  | 680/935 [27:52<09:42,  2.29s/it]

{'loss': 0.0837, 'learning_rate': 0.00030321046373365043, 'epoch': 3.63}


 74%|███████▍  | 690/935 [28:14<08:57,  2.19s/it]

{'loss': 0.1078, 'learning_rate': 0.000291319857312723, 'epoch': 3.68}


 75%|███████▍  | 700/935 [28:37<08:59,  2.29s/it]

{'loss': 0.1526, 'learning_rate': 0.0002794292508917955, 'epoch': 3.73}


 76%|███████▌  | 710/935 [29:00<08:28,  2.26s/it]

{'loss': 0.067, 'learning_rate': 0.000267538644470868, 'epoch': 3.79}


 77%|███████▋  | 720/935 [29:23<08:05,  2.26s/it]

{'loss': 0.0643, 'learning_rate': 0.00025564803804994056, 'epoch': 3.84}


 78%|███████▊  | 730/935 [29:45<07:44,  2.26s/it]

{'loss': 0.0761, 'learning_rate': 0.0002437574316290131, 'epoch': 3.89}


 79%|███████▉  | 740/935 [30:08<07:26,  2.29s/it]

{'loss': 0.0851, 'learning_rate': 0.0002318668252080856, 'epoch': 3.95}


 80%|████████  | 750/935 [30:31<07:08,  2.31s/it]

{'loss': 0.0642, 'learning_rate': 0.00021997621878715816, 'epoch': 4.0}


                                                 
 80%|████████  | 750/935 [31:13<07:08,  2.31s/it]

{'eval_loss': 0.08091837912797928, 'eval_accuracy': 0.972, 'eval_runtime': 42.1235, 'eval_samples_per_second': 35.61, 'eval_steps_per_second': 4.463, 'epoch': 4.0}


 81%|████████▏ | 760/935 [31:39<08:25,  2.89s/it]

{'loss': 0.0196, 'learning_rate': 0.0002080856123662307, 'epoch': 4.05}


 82%|████████▏ | 770/935 [32:03<06:32,  2.38s/it]

{'loss': 0.0291, 'learning_rate': 0.0001961950059453032, 'epoch': 4.11}


 83%|████████▎ | 780/935 [32:27<06:14,  2.42s/it]

{'loss': 0.0417, 'learning_rate': 0.00018430439952437576, 'epoch': 4.16}


 84%|████████▍ | 790/935 [32:50<05:51,  2.43s/it]

{'loss': 0.041, 'learning_rate': 0.0001724137931034483, 'epoch': 4.21}


 86%|████████▌ | 800/935 [33:15<05:27,  2.43s/it]

{'loss': 0.0191, 'learning_rate': 0.0001605231866825208, 'epoch': 4.27}


 87%|████████▋ | 810/935 [33:39<05:06,  2.45s/it]

{'loss': 0.0488, 'learning_rate': 0.00014863258026159336, 'epoch': 4.32}


 88%|████████▊ | 820/935 [34:03<04:37,  2.41s/it]

{'loss': 0.0165, 'learning_rate': 0.0001367419738406659, 'epoch': 4.37}


 89%|████████▉ | 830/935 [34:30<04:34,  2.61s/it]

{'loss': 0.0124, 'learning_rate': 0.0001248513674197384, 'epoch': 4.43}


 90%|████████▉ | 840/935 [34:56<04:07,  2.61s/it]

{'loss': 0.0183, 'learning_rate': 0.00011296076099881095, 'epoch': 4.48}


 91%|█████████ | 850/935 [35:22<03:40,  2.59s/it]

{'loss': 0.0456, 'learning_rate': 0.00010107015457788347, 'epoch': 4.53}


 92%|█████████▏| 860/935 [35:49<03:13,  2.58s/it]

{'loss': 0.0147, 'learning_rate': 8.9179548156956e-05, 'epoch': 4.59}


 93%|█████████▎| 870/935 [36:15<02:52,  2.66s/it]

{'loss': 0.0299, 'learning_rate': 7.728894173602855e-05, 'epoch': 4.64}


 94%|█████████▍| 880/935 [36:41<02:22,  2.59s/it]

{'loss': 0.0144, 'learning_rate': 6.539833531510107e-05, 'epoch': 4.69}


 95%|█████████▌| 890/935 [37:08<02:01,  2.69s/it]

{'loss': 0.0197, 'learning_rate': 5.35077288941736e-05, 'epoch': 4.75}


 96%|█████████▋| 900/935 [37:34<01:29,  2.56s/it]

{'loss': 0.0265, 'learning_rate': 4.1617122473246136e-05, 'epoch': 4.8}


 97%|█████████▋| 910/935 [38:00<01:04,  2.59s/it]

{'loss': 0.0103, 'learning_rate': 2.972651605231867e-05, 'epoch': 4.85}


 98%|█████████▊| 920/935 [38:29<00:41,  2.80s/it]

{'loss': 0.0207, 'learning_rate': 1.78359096313912e-05, 'epoch': 4.91}


 99%|█████████▉| 930/935 [38:57<00:14,  2.82s/it]

{'loss': 0.0162, 'learning_rate': 5.9453032104637345e-06, 'epoch': 4.96}


                                                 
100%|██████████| 935/935 [39:56<00:00,  2.84s/it]

{'eval_loss': 0.048720479011535645, 'eval_accuracy': 0.9846666666666667, 'eval_runtime': 44.7587, 'eval_samples_per_second': 33.513, 'eval_steps_per_second': 4.2, 'epoch': 4.99}


100%|██████████| 935/935 [40:00<00:00,  2.57s/it]


{'train_runtime': 2400.2691, 'train_samples_per_second': 12.499, 'train_steps_per_second': 0.39, 'train_loss': 0.33161284952240194, 'epoch': 4.99}
***** train metrics *****
  epoch                    =       4.99
  train_loss               =     0.3316
  train_runtime            = 0:40:00.26
  train_samples_per_second =     12.499
  train_steps_per_second   =       0.39


In [16]:
metrics = trainer.evaluate()
# some nice to haves:
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

100%|██████████| 188/188 [00:45<00:00,  4.14it/s]

***** eval metrics *****
  epoch                   =       4.99
  eval_accuracy           =     0.9847
  eval_loss               =     0.0487
  eval_runtime            = 0:00:45.67
  eval_samples_per_second =     32.843
  eval_steps_per_second   =      4.116



