In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "../")


from autogluon.vision import ImagePredictor, ImageDataset
import numpy as np
import pandas as pd
import pickle
import datetime
from pathlib import Path
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

## Read data

In [2]:
# path to data
# data on M.2 SSD for fast read when training models
CIFAR_10_DATA_PATH = "/Data/cifar10_png/"
CIFAR_100_DATA_PATH = "/Data/cifar100_png/"
DCAI_DATA_PATH = "/Data/andrew-ng-dcai-comp-2021-data-deduped/andrew-ng-dcai-comp-2021-data/"
MNIST_DATA_PATH = "/Data/mnist_png/mnist_png/"
FASHION_MNIST_DATA_PATH = "/Data/fashion_mnist_png/"

# read data from root folder
cifar_10_train_dataset, _, cifar_10_test_dataset = ImageDataset.from_folders(root=CIFAR_10_DATA_PATH)
cifar_100_train_dataset, _, cifar_100_test_dataset = ImageDataset.from_folders(root=CIFAR_100_DATA_PATH)
dcai_train_dataset, dcai_val_dataset, dcai_test_dataset = ImageDataset.from_folders(root=DCAI_DATA_PATH)
mnist_train_dataset, _, mnist_test_dataset = ImageDataset.from_folders(root=MNIST_DATA_PATH)
fashion_mnist_train_dataset, _, fashion_mnist_test_dataset = ImageDataset.from_folders(root=FASHION_MNIST_DATA_PATH)

In [3]:
# dictionary to store data path and model

data_model_dict = {
    "cifar-10": {
        "train_data": cifar_10_train_dataset,
        "test_data": cifar_10_test_dataset,
    },
    "cifar-100": {
        "train_data": cifar_100_train_dataset,
        "test_data": cifar_100_test_dataset,
    },
    "roman-numeral": {
        "train_data": dcai_train_dataset,
        "test_data": dcai_test_dataset,
    },
    "mnist": {
        "train_data": mnist_train_dataset,
        "test_data": mnist_test_dataset,
    },
    "fashion-mnist": {
        "train_data": fashion_mnist_train_dataset,
        "test_data": fashion_mnist_test_dataset,
    },
}

In [7]:
# Check out a dataset
cifar_100_train_dataset.head()

Unnamed: 0,image,label
0,/Data/cifar100_png/train/apple/0001.png,0
1,/Data/cifar100_png/train/apple/0002.png,0
2,/Data/cifar100_png/train/apple/0003.png,0
3,/Data/cifar100_png/train/apple/0004.png,0
4,/Data/cifar100_png/train/apple/0005.png,0


## Train model

In [None]:
%%time

def train_ag_model(
    train_data,
    dataset_name,
    model_folder="./",    
    epochs=100,
    model="swin_base_patch4_window7_224",
    time_limit=10*3600
):

    # init model
    predictor = ImagePredictor(verbosity=0)

    MODEL_PARAMS = {
        "model": model,
        "epochs": epochs,
    }

    # run training
    predictor.fit(
        train_data=train_data,
        # tuning_data=,
        ngpus_per_trial=1,
        hyperparameters=MODEL_PARAMS,
        time_limit=time_limit,
        random_state=123,
    )

    # save model
    filename = f"{model_folder}{model}_{dataset_name}.ag"
    predictor.save(filename)    
    
    return predictor

## Train model for all datasets

In [None]:
model = "resnet18"

for key, data in data_model_dict.items():

    dataset = key
    train_dataset = data["train_data"]
    
    print(f"Dataset: {dataset}")
    print(f"  Records: {train_dataset.shape}")
    print(f"  Classes: {train_dataset.label.nunique()}")    
    
    _ = train_ag_model(train_dataset, dataset_name=dataset, model=model, epochs=100)