
# Example of the `aitlas` toolbox in the context of benchmarking a dataset

This notebook shows a sample implementation of a multi class image classification using the `aitlas` toolbox using the Eurosat dataset.

In [4]:
import matplotlib.pyplot as plt
import os
import numpy as npi
import pandas as pd

from itertools import compress
from aitlas.datasets import EurosatDataset
from aitlas.models import ResNet50
from aitlas.visualizations import display_image_labels
from aitlas.utils import image_loader
from aitlas.tasks import StratifiedSplitTask

## Define the splits and dataset paths

In [31]:
splits = [(10, 90), (20, 80), (30, 70), (40, 60), (50, 50), (60, 40), (70, 30), (80, 20), (90, 10)] # one tuple is (train, test)
dataset_path = "/media/hdd/EuroSAT/2750/" # where is the dataset on disk
trainset_path = "/media/hdd/EuroSAT/train.csv" # where to store the train IDs 
testset_path = "/media/hdd/EuroSAT/test.csv" # where to store the test IDs

## Loop through the splits, train and evaluate

In [35]:
results = [] # results accumulator

# iterate through the splits
for train, test in splits:
    # configure split task
    split_config = {
            "split": {
                "train": {
                    "ratio": train,
                    "file": trainset_path
                },
                "test": {
                    "ratio": test,
                    "file": testset_path
                }
            },
            "path": dataset_path
    }
    split_task = StratifiedSplitTask(None, split_config)
    split_task.run()
    
    # setup train set
    train_dataset_config = {
        "batch_size": 4,
        "shuffle": True,
        "num_workers": 4,
        "csv_file_path": trainset_path,
        "transforms": ["aitlas.transforms.ResizeCenterCropFlipHVToTensor"]
    }

    train_dataset = EurosatDataset(train_dataset_config)
    
    # setup test set
    test_dataset_config = {
        "batch_size": 4,
        "shuffle": False,
        "num_workers": 4,
        "csv_file_path": testset_path,
        "transforms": ["aitlas.transforms.ResizeCenterCropToTensor"]
    }

    test_dataset = EurosatDataset(test_dataset_config)
    print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")
    
    
    # setup model
    epochs = 1
    model_directory = "./experiments/eurosat/"
    model_config = {"num_classes": 10, "learning_rate": 0.001,"pretrained": True}
    model = ShallowCNNNet(model_config)
    model.prepare()

    ## training and evaluation
    model.train_and_evaluate_model(
        train_dataset=train_dataset,
        epochs=epochs,
        model_directory=model_directory,
        val_dataset=test_dataset,
        run_id='1',
    )
    
    # collect results
    results.append(model.running_metrics.accuracy())

2021-09-06 16:27:30,749 INFO Loading data...
2021-09-06 16:27:30,803 INFO Making splits...
2021-09-06 16:27:30,849 INFO And that's it!
2021-09-06 16:27:30,912 INFO Starting training.


Train size: 2700, Test size: 24300


training:  15%|██████████▎                                                            | 98/675 [00:00<00:02, 211.06it/s]2021-09-06 16:27:31,485 INFO [1, 100], loss:  9.16797
training:  26%|██████████████████▏                                                   | 175/675 [00:00<00:02, 238.82it/s]2021-09-06 16:27:31,883 INFO [1, 200], loss:  8.96101
training:  41%|████████████████████████████▋                                         | 277/675 [00:01<00:01, 248.05it/s]2021-09-06 16:27:32,282 INFO [1, 300], loss:  8.32217
training:  56%|███████████████████████████████████████▌                              | 381/675 [00:01<00:01, 250.49it/s]2021-09-06 16:27:32,680 INFO [1, 400], loss:  8.12845
training:  72%|██████████████████████████████████████████████████▎                   | 485/675 [00:02<00:00, 250.11it/s]2021-09-06 16:27:33,083 INFO [1, 500], loss:  7.74597
training:  87%|█████████████████████████████████████████████████████████████         | 589/675 [00:02<00:00, 249.80it/s]2021-09-06

Train size: 5400, Test size: 21600


training:   6%|████                                                                  | 78/1350 [00:00<00:06, 203.77it/s]2021-09-06 16:27:53,568 INFO [1, 100], loss:  9.16436
training:  13%|█████████▎                                                           | 181/1350 [00:00<00:04, 243.10it/s]2021-09-06 16:27:53,963 INFO [1, 200], loss:  8.92271
training:  21%|██████████████▌                                                      | 285/1350 [00:01<00:04, 250.64it/s]2021-09-06 16:27:54,358 INFO [1, 300], loss:  8.44738
training:  29%|███████████████████▉                                                 | 389/1350 [00:01<00:03, 252.28it/s]2021-09-06 16:27:54,754 INFO [1, 400], loss:  8.09138
training:  37%|█████████████████████████▏                                           | 493/1350 [00:02<00:03, 251.64it/s]2021-09-06 16:27:55,152 INFO [1, 500], loss:  7.73496
training:  44%|██████████████████████████████▌                                      | 597/1350 [00:02<00:03, 250.11it/s]2021-09-06

Train size: 8100, Test size: 18900


training:   4%|██▋                                                                   | 77/2025 [00:00<00:09, 201.50it/s]2021-09-06 16:28:18,566 INFO [1, 100], loss:  9.18163
training:   9%|██████                                                               | 179/2025 [00:00<00:07, 240.92it/s]2021-09-06 16:28:18,964 INFO [1, 200], loss:  9.02138
training:  14%|█████████▋                                                           | 283/2025 [00:01<00:06, 249.06it/s]2021-09-06 16:28:19,362 INFO [1, 300], loss:  8.85185
training:  19%|█████████████▏                                                       | 386/2025 [00:01<00:06, 250.34it/s]2021-09-06 16:28:19,761 INFO [1, 400], loss:  8.39584
training:  24%|████████████████▋                                                    | 490/2025 [00:02<00:06, 251.05it/s]2021-09-06 16:28:20,160 INFO [1, 500], loss:  8.49799
training:  29%|████████████████████▏                                                | 593/2025 [00:02<00:05, 247.54it/s]2021-09-06

Train size: 10800, Test size: 16200


training:   3%|█▉                                                                    | 77/2700 [00:00<00:12, 202.47it/s]2021-09-06 16:28:45,954 INFO [1, 100], loss:  9.20105
training:   7%|████▋                                                                | 181/2700 [00:00<00:10, 241.40it/s]2021-09-06 16:28:46,351 INFO [1, 200], loss:  9.14994
training:  11%|███████▎                                                             | 285/2700 [00:01<00:09, 248.78it/s]2021-09-06 16:28:46,750 INFO [1, 300], loss:  8.92913
training:  14%|█████████▉                                                           | 388/2700 [00:01<00:09, 249.86it/s]2021-09-06 16:28:47,150 INFO [1, 400], loss:  8.53221
training:  18%|████████████▌                                                        | 492/2700 [00:02<00:08, 250.01it/s]2021-09-06 16:28:47,550 INFO [1, 500], loss:  8.08270
training:  22%|███████████████▏                                                     | 596/2700 [00:02<00:08, 249.78it/s]2021-09-06

Train size: 13500, Test size: 13500


training:   2%|█▌                                                                    | 78/3375 [00:00<00:16, 205.66it/s]2021-09-06 16:29:16,173 INFO [1, 100], loss:  9.21767
training:   5%|███▋                                                                 | 182/3375 [00:00<00:13, 241.24it/s]2021-09-06 16:29:16,573 INFO [1, 200], loss:  9.07196
training:   8%|█████▊                                                               | 285/3375 [00:01<00:12, 248.46it/s]2021-09-06 16:29:16,972 INFO [1, 300], loss:  8.67863
training:  11%|███████▉                                                             | 386/3375 [00:01<00:11, 249.69it/s]2021-09-06 16:29:17,372 INFO [1, 400], loss:  8.39738
training:  14%|█████████▉                                                           | 489/3375 [00:02<00:11, 250.21it/s]2021-09-06 16:29:17,772 INFO [1, 500], loss:  8.11457
training:  18%|████████████                                                         | 592/3375 [00:02<00:11, 249.87it/s]2021-09-06

Train size: 16200, Test size: 10800


training:   2%|█▎                                                                    | 77/4050 [00:00<00:19, 203.35it/s]2021-09-06 16:29:49,161 INFO [1, 100], loss:  9.19727
training:   4%|███                                                                  | 178/4050 [00:00<00:16, 239.58it/s]2021-09-06 16:29:49,563 INFO [1, 200], loss:  8.95197
training:   7%|████▊                                                                | 279/4050 [00:01<00:15, 247.51it/s]2021-09-06 16:29:49,963 INFO [1, 300], loss:  8.49448
training:   9%|██████▍                                                              | 379/4050 [00:01<00:14, 248.98it/s]2021-09-06 16:29:50,364 INFO [1, 400], loss:  7.85051
training:  12%|████████▏                                                            | 481/4050 [00:02<00:14, 249.82it/s]2021-09-06 16:29:50,765 INFO [1, 500], loss:  7.85146
training:  14%|█████████▉                                                           | 583/4050 [00:02<00:13, 249.17it/s]2021-09-06

2021-09-06 16:30:24,357 INFO Starting training.


Train size: 18900, Test size: 8100


training:   2%|█▏                                                                    | 78/4725 [00:00<00:22, 204.24it/s]2021-09-06 16:30:24,914 INFO [1, 100], loss:  9.22711
training:   4%|██▋                                                                  | 182/4725 [00:00<00:18, 242.85it/s]2021-09-06 16:30:25,310 INFO [1, 200], loss:  9.19584
training:   6%|████▏                                                                | 285/4725 [00:01<00:17, 249.98it/s]2021-09-06 16:30:25,707 INFO [1, 300], loss:  9.14257
training:   8%|█████▋                                                               | 389/4725 [00:01<00:17, 249.24it/s]2021-09-06 16:30:26,108 INFO [1, 400], loss:  9.08346
training:  10%|███████▏                                                             | 491/4725 [00:02<00:16, 249.59it/s]2021-09-06 16:30:26,509 INFO [1, 500], loss:  8.71699
training:  13%|████████▋                                                            | 595/4725 [00:02<00:16, 251.81it/s]2021-09-06

training: 100%|████████████████████████████████████████████████████████████████████| 4725/4725 [00:19<00:00, 247.62it/s]
2021-09-06 16:30:43,446 INFO epoch: 1, time: 19, loss:  1.85760
testing on train set: 100%|████████████████████████████████████████████████████████| 4725/4725 [00:13<00:00, 362.02it/s]
2021-09-06 16:30:56,572 INFO F1_score Micro:0.3355026455026455, F1_score Macro:nan, F1_score Weighted:nan, F1_score per Class:0.41725719049604, 0.6664309704790524, 0.1458904109589041, 0.16301489921121823, 0.06925498426023086, 0.16199376947040497, 0.31894522053052893, 0.3549209586945436, 0.3912054586808188, nan
testing on validation set: 100%|███████████████████████████████████████████████████| 2025/2025 [00:05<00:00, 350.06it/s]
2021-09-06 16:31:02,361 INFO F1_score Micro:0.3397530864197531, F1_score Macro:nan, F1_score Weighted:nan, F1_score per Class:0.4246369554331497, 0.6724137931034482, 0.13957176843774782, 0.199796126401631, 0.08048780487804877, 0.15782983970406905, 0.31687546057

Train size: 21600, Test size: 5400


training:   1%|▉                                                                     | 76/5400 [00:00<00:26, 201.35it/s]2021-09-06 16:31:03,332 INFO [1, 100], loss:  9.19204
training:   3%|██▎                                                                  | 178/5400 [00:00<00:21, 240.18it/s]2021-09-06 16:31:03,732 INFO [1, 200], loss:  9.19181
training:   5%|███▌                                                                 | 281/5400 [00:01<00:20, 247.79it/s]2021-09-06 16:31:04,132 INFO [1, 300], loss:  8.92322
training:   7%|████▊                                                                | 381/5400 [00:01<00:20, 247.97it/s]2021-09-06 16:31:04,536 INFO [1, 400], loss:  8.53523
training:   9%|██████▏                                                              | 482/5400 [00:02<00:19, 249.49it/s]2021-09-06 16:31:04,936 INFO [1, 500], loss:  8.49719
training:  11%|███████▍                                                             | 586/5400 [00:02<00:19, 250.06it/s]2021-09-06

training:  89%|████████████████████████████████████████████████████████████▏       | 4783/5400 [00:19<00:02, 250.41it/s]2021-09-06 16:31:22,192 INFO [1, 4800], loss:  5.91122
training:  90%|█████████████████████████████████████████████████████████████▌      | 4887/5400 [00:19<00:02, 250.20it/s]2021-09-06 16:31:22,592 INFO [1, 4900], loss:  6.03328
training:  92%|██████████████████████████████████████████████████████████████▊     | 4990/5400 [00:20<00:01, 249.83it/s]2021-09-06 16:31:22,992 INFO [1, 5000], loss:  6.06468
training:  94%|████████████████████████████████████████████████████████████████▏   | 5093/5400 [00:20<00:01, 249.67it/s]2021-09-06 16:31:23,394 INFO [1, 5100], loss:  6.29633
training:  96%|█████████████████████████████████████████████████████████████████▍  | 5193/5400 [00:21<00:00, 242.01it/s]2021-09-06 16:31:23,811 INFO [1, 5200], loss:  6.07327
training:  98%|██████████████████████████████████████████████████████████████████▋ | 5293/5400 [00:21<00:00, 247.11it/s]2021-

Train size: 24300, Test size: 2700


training:   1%|▉                                                                     | 77/6075 [00:00<00:29, 201.47it/s]2021-09-06 16:31:44,253 INFO [1, 100], loss:  9.20918
training:   3%|██                                                                   | 179/6075 [00:00<00:24, 240.02it/s]2021-09-06 16:31:44,654 INFO [1, 200], loss:  8.87725
training:   5%|███▏                                                                 | 281/6075 [00:01<00:23, 247.68it/s]2021-09-06 16:31:45,054 INFO [1, 300], loss:  8.39671
training:   6%|████▎                                                                | 384/6075 [00:01<00:22, 249.45it/s]2021-09-06 16:31:45,454 INFO [1, 400], loss:  8.01363
training:   8%|█████▍                                                               | 484/6075 [00:02<00:22, 248.61it/s]2021-09-06 16:31:45,857 INFO [1, 500], loss:  7.91925
training:  10%|██████▋                                                              | 584/6075 [00:02<00:22, 248.20it/s]2021-09-06

training:  79%|█████████████████████████████████████████████████████▋              | 4791/6075 [00:19<00:05, 250.07it/s]2021-09-06 16:32:03,047 INFO [1, 4800], loss:  6.36488
training:  81%|██████████████████████████████████████████████████████▊             | 4894/6075 [00:19<00:04, 250.03it/s]2021-09-06 16:32:03,447 INFO [1, 4900], loss:  6.38513
training:  82%|███████████████████████████████████████████████████████▉            | 4996/6075 [00:20<00:04, 248.16it/s]2021-09-06 16:32:03,853 INFO [1, 5000], loss:  6.45843
training:  84%|█████████████████████████████████████████████████████████           | 5098/6075 [00:20<00:03, 249.55it/s]2021-09-06 16:32:04,253 INFO [1, 5100], loss:  6.53353
training:  86%|██████████████████████████████████████████████████████████▏         | 5198/6075 [00:20<00:03, 248.92it/s]2021-09-06 16:32:04,656 INFO [1, 5200], loss:  6.10989
training:  87%|███████████████████████████████████████████████████████████▎        | 5299/6075 [00:21<00:03, 249.35it/s]2021-

## See the results

In [36]:
df = pd.DataFrame(zip(splits, [round(float(r["Accuracy"])*100, 2) for r in results]), columns=["Train/Test", "Accuracy"])

In [37]:
df

Unnamed: 0,Train/Test,Accuracy
0,"(10, 90)",24.81
1,"(20, 80)",25.72
2,"(30, 70)",26.7
3,"(40, 60)",29.46
4,"(50, 50)",28.5
5,"(60, 40)",35.0
6,"(70, 30)",33.98
7,"(80, 20)",42.81
8,"(90, 10)",43.59
