In [None]:
import os
import sys

import optuna
import torch
import pandas as pd

sys.path.insert(0, os.path.join(os.getcwd(), ".."))

from config import MODEL_PATH, STORAGE, DATA_DIR, BREED
from src.dataset import Dogs, fetch_dataset
from src.model import EfficientNet
from src.train import run_training, evaluate


In [None]:
N_TRIALS = 50


In [None]:
def objective(trial):

    batch_size = trial.suggest_int("batch_size", low=32, high=48)
    epochs = trial.suggest_int("epochs", low=10, high=40)
    random_seed = trial.suggest_int("random_seed", low=0, high=1000_000)
    lr = trial.suggest_loguniform("lr", low=1e-6, high=1e-4)
    lr_step = trial.suggest_int("lr_step", low=5, high=8)
    lr_gamma = trial.suggest_loguniform("lr_gamma", low=1e-3, high=1e-2)

    train_dataloader, valid_dataloader = fetch_dataset(
        random_seed=random_seed, batch_size=batch_size
    )

    print(
        f"Epoch: {epochs}, Batch_size: {batch_size}, Learning_rate: {lr}, Random_Seed: {random_seed}"
    )

    validation_accuracy = run_training(
        train_dataloader=train_dataloader,
        valid_dataloader=valid_dataloader,
        epochs=epochs,
        lr=lr,
        lr_step=lr_step,
        lr_gamma=lr_gamma,
        random_seed=random_seed,
    )

    return validation_accuracy


In [None]:
study = optuna.create_study(
    study_name="dog-breed",
    direction="maximize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(multivariate=True),
    storage=STORAGE,
    load_if_exists=True,
)

study.optimize(
    objective,
    n_trials=N_TRIALS,
    gc_after_trial=True,
)


In [None]:
study = optuna.load_study(
    study_name="dog-breed",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(multivariate=True),
    storage=STORAGE,
)

train_dataloader, valid_dataloader = fetch_dataset(
    random_seed=study.best_params["random_seed"],
    batch_size=study.best_params["batch_size"],
)

run_training(
    train_dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    epochs=study.best_params["epochs"],
    lr=study.best_params["lr"],
    lr_step=study.best_params["lr_step"],
    lr_gamma=study.best_params["lr_gamma"],
    random_seed=study.best_params["random_seed"],
)

model = EfficientNet().model
model.load_state_dict(torch.load(MODEL_PATH))

test_dataset = Dogs(split="test")
accuracy, probabilities = evaluate(
    model=model,
    dataloader=torch.utils.data.DataLoader(
        test_dataset, batch_size=study.best_params["batch_size"], shuffle=False
    ),
)

print(f"Test Accuracy: {accuracy:.2f}")


In [None]:
ids = os.listdir(os.path.join(DATA_DIR, "test"))
ids = [_id.split(".")[0] for _id in ids]
submission = pd.DataFrame(ids)
submission.columns = ["id"]


In [None]:
result = pd.DataFrame(torch.vstack(probabilities).numpy())
result.columns = BREED


In [None]:
submission = submission.merge(result, left_index=True, right_index=True)
submission


In [None]:
submission.to_csv(
    os.path.join(DATA_DIR, "submission.csv"),
    index=False,
)
