In [1]:
import os
import sys

import optuna
import torch
import pandas as pd

sys.path.insert(0, os.path.join(os.getcwd(), ".."))

from config import MODEL_PATH, STORAGE, DATA_DIR, BREED
from src.dataset import fetch_dataset
from src.model import EfficientNet
from src.train import run_training, evaluate

In [2]:
N_TRIALS = 50
os.environ["http_proxy"] = ""
os.environ["https_proxy"] = ""
os.environ["HTTP_PROXY"] = ""
os.environ["HTTPS_PROXY"] = ""


In [None]:
def objective(trial):

    batch_size = trial.suggest_int("batch_size", low=32, high=48)
    epochs = trial.suggest_int("epochs", low=8, high=12)
    random_seed = trial.suggest_int("random_seed", low=0, high=1000_000)
    lr = trial.suggest_loguniform("lr", low=0.001, high=0.01)

    train_dataloader, valid_dataloader, _ = fetch_dataset(
        random_seed=random_seed, batch_size=batch_size
    )

    print(
        f"Epoch: {epochs}, Batch: {batch_size}, LR: {lr}, Seed: {random_seed}"
    )

    _, validation_loss = run_training(
        train_dataloader=train_dataloader,
        valid_dataloader=valid_dataloader,
        epochs=epochs,
        lr=lr,
        freeze_layers=True,
    )

    return validation_loss


In [None]:
study = optuna.create_study(
    study_name="dog-breed",
    direction="minimize",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(multivariate=True),
    storage=STORAGE,
    load_if_exists=True,
)

study.optimize(
    objective,
    n_trials=N_TRIALS,
    gc_after_trial=True,
)


In [None]:
study = optuna.load_study(
    study_name="dog-breed",
    pruner=optuna.pruners.HyperbandPruner(),
    sampler=optuna.samplers.TPESampler(multivariate=True),
    storage=STORAGE,
)

train_dataloader, valid_dataloader, test_dataloader = fetch_dataset(
    random_seed=study.best_params["random_seed"],
    batch_size=study.best_params["batch_size"],
)

run_training(
    train_dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    epochs=study.best_params["epochs"],
    lr=study.best_params["lr"],
    random_seed=study.best_params["random_seed"],
    verbose=True
)

model = EfficientNet().model
model.load_state_dict(torch.load(MODEL_PATH))

_, probabilities, _ = evaluate(
            model=model, dataloader=test_dataloader
        )


In [6]:
ids = os.listdir(os.path.join(DATA_DIR, "test"))
ids = [_id.split(".")[0] for _id in ids]
submission = pd.DataFrame(ids)
submission.columns = ["id"]

result = pd.DataFrame(torch.vstack(probabilities).numpy())
result.columns = BREED

submission = submission.merge(result, left_index=True, right_index=True)
submission

submission.to_csv(
    os.path.join(DATA_DIR, "submission.csv"),
    index=False,
)
