In [None]:
%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [None]:
import pandas as pd
from config import config_imp as conf
from scripts.dataset import PlanetDataSet
from scripts.transforms import rescale

In [None]:
data_df = pd.read_csv(conf.data_file)
data_df["id"] = data_df["id"].astype(str)

In [None]:
data_df.columns

In [None]:
# below cells will set and fix a the train and test datasets

# train_df, test_df = train_test_split(data_df, test_size=0.1)
# to perform replicable and comparable results, let's fix the training and validation datasets
# data_df["group"] = None
# data_df.loc[data_df.id.isin(train_df.id), "group"] = "train_df"
# data_df.loc[data_df.id.isin(test_df.id), "group"] = "test_df"
# data_df.to_csv(conf.data_file.with_name("img_lbls_splitted.csv"))

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
SETTING_NAME = "imp_test_5"

In [None]:
from scripts.model import get_settings

(
    model,
    model_name,
    optimizer,
    loss_fn,
    scheduler,
    variable,
    batch_size,
    rescale_factor,
    metadata,
) = get_settings(SETTING_NAME)
metadata

In [None]:
import pandas as pd
from config import config_imp as conf
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms as T
from scripts.transforms import normalize, rescale
from scripts.dataset import PlanetDataSet

### Create dataset

In [None]:
transforms = T.Compose(
    [
        rescale((rescale_factor, rescale_factor)),
        # normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],),
    ]
)

In [None]:
planet_dataset = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=data_df,
    label_col=variable,
    transforms=transforms,
)
all_tags_dict = planet_dataset.class_to_idx
all_tags_dict

In [None]:
train_df = data_df[data_df.group=="train_df"]
test_df = data_df[data_df.group=="test_df"]
"train_df:", len(train_df), "test_df:", len(test_df)

In [None]:
pl_train = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=train_df,
    label_col=variable,
    transforms=transforms,
    fixed_tags=all_tags_dict,
)

pl_val = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=test_df,
    label_col=variable,
    transforms=transforms,
    fixed_tags=all_tags_dict,
)

In [None]:
dl_train = DataLoader(
    pl_train,
    batch_size=batch_size,
    shuffle=True,
)
dl_val = DataLoader(
    pl_val,
    batch_size=batch_size,
    shuffle=True,
)

In [None]:
(next(iter(dl_train))["image"].shape, next(iter(dl_train))["label"].shape)

In [None]:
next(iter(dl_train))["label"].squeeze()

In [None]:
# Check each dataloader has the same ammount of labels (Only when using multilabel)
len(next(iter(dl_train))["label"][0]), len(next(iter(dl_val))["label"][0])

# print(len(pl_train.class_to_idx), len(pl_val.class_to_idx))

### train the data

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
from scripts.writer import Writer
from scripts.train import train
from torch.autograd import Variable
import torch

In [None]:
# Create writers to save training info accordinglt
writer = Writer()
writer.plot()

In [None]:
train(
    SETTING_NAME,
    num_epochs=200,
    train_loader=dl_train,
    val_loader=dl_val,
    writer=writer,
)

### Load and test a model

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from config import config_imp as conf
import torch
from datetime import datetime
from pathlib import Path
from scripts.writer import Writer

In [None]:
model_path = sorted(
    list([(file.stat().st_atime, file) for file in conf.out_model_path.glob("*")])
)[-1][1]

In [None]:
history_file = (conf.out_history/model_path.name).with_suffix(".csv")

In [None]:
writer = Writer()
writer.model_name = str(history_file.stem)
writer.load_data(history_file)

In [None]:
model.load_state_dict(torch.load(model_path))
model.eval()
model.to("cuda");

In [None]:
all_tags_dict

In [None]:
classes = list(all_tags_dict.keys())

# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for sample in dl_val:
        images, labels = sample.values()
        labels = labels.squeeze()
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")

In [None]:
images.shape