In [None]:
%reload_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [None]:
import pandas as pd
from config import conf
from scripts.dataset import PlanetDataSet
from scripts.transforms import rescale

In [None]:
data_df = pd.read_csv(conf.data_file)
data_df["id"] = data_df["id"].astype(str)
data_df.columns

In [None]:
data_df.head(2)

## Exploring data

In [None]:
len(data_df)

In [None]:
x = data_df.sample(10)
x;

In [None]:
planet_dataset = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=data_df,
    label_col="pl_strata",
    transforms=rescale((32, 32)),
    multilabel=False,
)

## Transformations

For optimal performances, resnet18 need input shape that are multiple of 32 and in our case we have input of size 31. So the closest input might be 32

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from torchgeo.transforms import AugmentationSequential, indices
from torch.utils.data import DataLoader

In [None]:
dataloader = DataLoader(
    planet_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=0,
)
dataloader = iter(dataloader)
print(f"Number of images in dataset: {len(planet_dataset)}")
print(f"Dataset Classes: {planet_dataset.classes}")

In [None]:
sample = planet_dataset[807]
x, y = sample["image"], sample["label"]

In [None]:
print(x.shape, x.dtype, x.min(), x.max())
print(y)

In [None]:
batch = next(dataloader)

## Training

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
SETTING_NAME = "test_1"

In [None]:
from scripts.model import get_settings

(
    model,
    model_name,
    optimizer,
    loss_fn,
    scheduler,
    variable,
    batch_size,
    rescale_factor,
    metadata,
) = get_settings(SETTING_NAME)
metadata

In [None]:
import pandas as pd
from config import config_imp as conf
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from torchvision import transforms as T
from scripts.transforms import normalize, rescale
from scripts.dataset import PlanetDataSet

#### Subset the dataset

In [None]:
data_df = data_df[~data_df["degraded_forest"].isna()]
data_df = data_df[data_df["lc_sub_tags"] != "Forest Plantation"]
data_df["degraded_forest"] = data_df["degraded_forest"].astype(str)
len(data_df)

#### Subset with downsampling

In [None]:
grassland = data_df[data_df.lc_tags == "Grassland"].sample(60)
forest = data_df[data_df.lc_tags == "Forest"].sample(60)
other_land = data_df[data_df.lc_tags == "Otherland"].sample(60)
rest = data_df[~data_df.lc_tags.isin(["Grassland", "Forest", "Otherland"])]

data_df = pd.concat([grassland, forest, other_land, rest])
data_df;

### Create dataset

In [None]:
transforms = T.Compose(
    [
        rescale((rescale_factor, rescale_factor)),
        # normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225],),
    ]
)

In [None]:
planet_dataset = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=data_df,
    label_col=variable,
    transforms=transforms,
)
all_tags_dict = planet_dataset.class_to_idx
all_tags_dict

In [None]:
train_df, test_df = train_test_split(data_df, test_size=0.1)

In [None]:
pl_train = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=train_df,
    label_col=variable,
    transforms=transforms,
    # multilabel=True,
    fixed_tags=all_tags_dict,
)

pl_val = PlanetDataSet(
    root=conf.train_imgs_path,
    data_df=test_df,
    label_col=variable,
    transforms=transforms,
    # multilabel=True,
    fixed_tags=all_tags_dict,
)

In [None]:
dl_train = DataLoader(
    pl_train,
    batch_size=batch_size,
    shuffle=True,
)
dl_val = DataLoader(
    pl_val,
    batch_size=batch_size,
    shuffle=True,
)

In [None]:
(next(iter(dl_train))["image"].shape, next(iter(dl_train))["label"].shape)

In [None]:
next(iter(dl_train))["label"].squeeze()

In [None]:
# Check each dataloader has the same ammount of labels (Only when using multilabel)
len(next(iter(dl_train))["label"][0]), len(next(iter(dl_val))["label"][0])

# print(len(pl_train.class_to_idx), len(pl_val.class_to_idx))

### train the data

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
# from IPython.core.interactiveshell import InteractiveShell

# InteractiveShell.ast_node_interactivity = "all"

In [None]:
from scripts.writer import Writer
from scripts.train import train
from torch.autograd import Variable
import torch

In [None]:
# Create writers to save training info accordinglt
writer = Writer()
writer.plot()

In [None]:
train(
    SETTING_NAME,
    num_epochs=200,
    train_loader=dl_train,
    val_loader=dl_val,
    writer=writer,
)

### Load and test a model

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from config import config_imp as conf
import torch
from datetime import datetime

In [None]:
model_path = sorted(
    list([(file.stat().st_atime, file) for file in conf.out_model_path.glob("*")])
)[-1][1]
model_path

In [None]:
model.load_state_dict(torch.load(model_path))
model.eval()
model.to("cuda");

In [None]:
all_tags_dict

In [None]:
print(f"Accuracy of the network on the {total} test images: {100 * correct // total} %")

In [None]:
classes = list(all_tags_dict.keys())

# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for sample in dl_val:
        images, labels = sample.values()
        labels = labels.squeeze()
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f"Accuracy for class: {classname:5s} is {accuracy:.1f} %")

In [None]:
from config import conf
from scripts.writer import Writer

In [None]:
file_path = next(iter(list(conf.out_history.glob("*1722*"))))

In [None]:
writer = Writer("statistics")

In [None]:
df = writer.plot_metrics(file_path=file_path, title="Model 1722")

### MNIST training

In [None]:
import torch
from torchvision.transforms import Resize
from torchvision import datasets, transforms
import numpy as np

transform = transforms.Compose(
    [
        Resize((32, 32)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
        lambda x: np.repeat(x, 3, axis=0),
        lambda x: x.to("cuda"),
    ]
)

dataset1 = datasets.MNIST("../mnist", train=True, download=True, transform=transform)
dataset2 = datasets.MNIST("../mnist", train=False, transform=transform)
dl_train = torch.utils.data.DataLoader(dataset1, **{"batch_size": 128})
dl_val = torch.utils.data.DataLoader(dataset2, **{"batch_size": 128})

A loss function computes a value that estimates how far away the output is from the target. The main objective is to reduce the loss function's value by changing the weight vector values through backpropagation in neural networks.

