In [1]:
import os
import torch
import pandas as pd
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

# import torchvision.transforms.functional
# import torchvision.transforms.functional as F

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn

from torchinfo import summary
from tqdm import tqdm

# Planet : Amazon space -- Full model running for multilabel classification

!! This is supposed to become the main jupyternotebook to run the multilabel classification and testing !!

Usefull links:
<https://learnopencv.com/multi-label-image-classification-with-pytorch-image-tagging/>

## 0) Initialization
### 0.1) Getting Module and Functions

In [2]:
from Multilabel_Amazon_Engine import checking_folder, train, show_4_image_in_batch, batch_prediction
from Multilabel_Amazon_Module import AmazonSpaces, MultiLayerCNN, AdjustSaturation

In [3]:
data_folder = '../IPEO_Planet_project'
if not os.path.exists(data_folder):
    data_folder = input("Enter the data folder path: ")
    assert os.path.exists(data_folder), "I did not find the folder at, "+str(data_folder)

## 1) Putting the model on the gpu

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cuda device


In [5]:
model = MultiLayerCNN().to(device)

### + Load previously trained model

In [6]:
model.load_state_dict(torch.load('model_multilabel_classification.pth'))

<All keys matched successfully>

In [7]:
model.eval()

MultiLayerCNN(
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool_max): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool_avg): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (fc): Linear(in_features=14580, out_features=17, bias=True)
  (batchNorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (loss): BCELoss()
  (sig): Sigmoid()
)

## 2) Getting the different datasets

In [8]:
train_csv = 'training.csv'
validation_csv = 'validation.csv'
test_csv = 'test.csv'

In [9]:
Transform_choice = transforms.Compose([transforms.ToTensor(), transforms.CenterCrop(256),transforms.RandomAutocontrast(p=1), AdjustSaturation(1.2)])

In [10]:
train_dataset = AmazonSpaces(csv_file=train_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)
validation_dataset = AmazonSpaces(csv_file=validation_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)
test_dataset = AmazonSpaces(csv_file=test_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)

## 3) Wrapping into the different dataloaders

In [11]:
batch_size = 128
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,drop_last = True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size,drop_last = True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True)

## 4) Choice of Criterion and Opitimizer

In [12]:
criterion = nn.BCEWithLogitsLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01)

## 5) TRAINING

In [19]:
The_results = train(model, train_dataloader, validation_dataloader, device=device, optimizer= optim, lr = 0.01, epochs=2, loss_fn=criterion)

Training


1it [00:10, 10.30s/it]

torch.Size([128, 3, 256, 256])
Predicted : [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] and calculated accuracy: 0.8823529411764706
loss: 0.6818896659520653 and acc: 0.8823529411764706


101it [06:03,  2.96s/it]

loss: 0.14949337321978962 and acc: 0.9699739735002912


187it [10:01,  3.21s/it]

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "C:\Users\gezas\.conda\envs\IPEO_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3433, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\gezas\AppData\Local\Temp\ipykernel_13944\3461717481.py", line 1, in <module>
    The_results = train(model, train_dataloader, validation_dataloader, device=device, optimizer= optim, lr = 0.01, epochs=2, loss_fn=criterion)
  File "D:\gezas\Documents\EPFL-MA\IPEO-projet\Multilabel_Amazon_Engine.py", line 121, in train
    vl, va = validate(model, validation_dataloader, loss_fn=loss_fn, device=device)
  File "D:\gezas\Documents\EPFL-MA\IPEO-projet\Multilabel_Amazon_Engine.py", line 28, in validate
    net.eval()
NameError: name 'net' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\gezas\.conda\envs\IPEO_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2052, in showt

## 6) TESTING

In [15]:
# store stats
losses, accuracies = [], []
all_pred = []
for batch in tqdm(test_dataloader):
    # TODO run prediction_step
    loss, accuracy, predictions = batch_prediction(batch, model, device = 'cuda')

    # append to stats
    losses.append(loss)
    accuracies.append(accuracy)
    all_pred.append(predictions)
# average val losses and accuracies over batches
losses, accuracies = np.stack(losses).mean(), np.stack(accuracies).mean()
print(f"valloss {losses:.2f}, val accuracy {accuracies*100:.2f}")

100%|██████████| 65/65 [01:03<00:00,  1.02it/s]

valloss 0.69, val accuracy 82.84





In [25]:
mean_pred = np.stack(all_pred, axis = 0).mean(axis=0)
mean_pred

array([0.30012019, 0.07512019, 0.81850962, 0.94975962, 0.81165865,
       0.69002404, 0.79471154, 0.93173077, 0.88257212, 0.90216346,
       0.99026442, 0.97319712, 0.9890625 , 0.98942308, 0.99639423,
       0.99254808, 0.99507212])

# Saving Models

In [26]:
torch.save(model.state_dict(), "model_multilabel_classification.pth")
print("Saved PyTorch Model State to model_multilabel_classification.pth")

Saved PyTorch Model State to model_multilabel_classification.pth
