In [1]:
import os
import torch
import pandas as pd
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt

# import torchvision.transforms.functional
# import torchvision.transforms.functional as F

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn

from torchinfo import summary
from tqdm import tqdm

# Planet : Amazon space -- Full model running for multilabel classification

!! This is supposed to become the main jupyternotebook to run the multilabel classification and testing !!

Usefull links:
<https://learnopencv.com/multi-label-image-classification-with-pytorch-image-tagging/>

## 0) Initialization
### 0.1) Getting Module and Functions

In [2]:
from Multilabel_Amazon_Engine import checking_folder, train, show_4_image_in_batch, batch_prediction
from Multilabel_Amazon_Module import AmazonSpaces, MultiLayerCNN, AdjustSaturation

In [3]:
data_folder = '../IPEO_Planet_project'
if not os.path.exists(data_folder):
    data_folder = input("Enter the data folder path: ")
    assert os.path.exists(data_folder), "I did not find the folder at, "+str(data_folder)

## 1) Putting the model on the gpu

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

Using cpu device


In [5]:
model = MultiLayerCNN().to(device)

### + Load previously trained model

In [6]:
if device=="cpu":
    model.load_state_dict(torch.load('model_multilabel_classification.pth', map_location=torch.device('cpu')))
else:
    model.load_state_dict(torch.load('model_multilabel_classification.pth'))

In [7]:
model.eval()

MultiLayerCNN(
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool_max): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool_avg): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (fc): Linear(in_features=14580, out_features=17, bias=True)
  (batchNorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (loss): BCELoss()
  (sig): Sigmoid()
)

## 2) Getting the different datasets

In [8]:
train_csv = 'training.csv'
validation_csv = 'validation.csv'
test_csv = 'test.csv'

In [9]:
Transform_choice = transforms.Compose([transforms.ToTensor(), transforms.CenterCrop(256),transforms.RandomAutocontrast(p=1), AdjustSaturation(1.2)])

In [10]:
train_dataset = AmazonSpaces(csv_file=train_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)
validation_dataset = AmazonSpaces(csv_file=validation_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)
test_dataset = AmazonSpaces(csv_file=test_csv,
                                    root_dir=f'{data_folder}/train-jpg', transform=Transform_choice)

## 3) Wrapping into the different dataloaders

In [11]:
batch_size = 64
train_dataloader = DataLoader(train_dataset, batch_size=batch_size,drop_last = True)
validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size,drop_last = True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True)

## 4) Choice of Criterion and Opitimizer

In [12]:
criterion = nn.BCEWithLogitsLoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01)

## 5) TRAINING

In [13]:
The_results = train(model, train_dataloader, validation_dataloader, device=device, optimizer= optim, lr = 0.01, epochs=1, loss_fn=criterion)

Training


  _warn_prf(average, modifier, msg_start, len(result))
1it [00:02,  2.13s/it]

torch.Size([64, 3, 256, 256])
(64, 17) (64, 17)
Predicted : [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]], calculated accuracy score: 0.8823529411764706, prediction score : 0.0, recall score: 0.0
Loss : 0.6821922768509426, calculated accuracy score: 0.8823529411764706, prediction score : 0.0, recall score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
21it [00:48,  2.27s/it]

Loss : 0.3403016141122093, calculated accuracy score: 0.963235294117647, prediction score : 0.9047619047619048, recall score: 0.6875


41it [01:36,  2.29s/it]

Loss : 0.18709528775072215, calculated accuracy score: 0.9811692969870877, prediction score : 0.9512195121951219, recall score: 0.8399390243902439


61it [02:22,  2.26s/it]

Loss : 0.12878648125081993, calculated accuracy score: 0.9873432979749278, prediction score : 0.9672131147540983, recall score: 0.8924180327868853


81it [03:17,  3.45s/it]

Loss : 0.09817952418933926, calculated accuracy score: 0.9904684095860565, prediction score : 0.9753086419753086, recall score: 0.9189814814814815


101it [04:04,  2.23s/it]

Loss : 0.07938281896504885, calculated accuracy score: 0.992355853232382, prediction score : 0.9801980198019802, recall score: 0.9350247524752475


121it [04:52,  2.31s/it]

Loss : 0.06666431397998877, calculated accuracy score: 0.9936193485658725, prediction score : 0.9834710743801653, recall score: 0.9457644628099173


141it [05:37,  2.16s/it]

Loss : 0.07016021638035225, calculated accuracy score: 0.9910956403838131, prediction score : 0.9731493794326241, recall score: 0.9364472517730497


161it [06:22,  2.14s/it]

Loss : 0.06336046336086727, calculated accuracy score: 0.992201772013153, prediction score : 0.9764848602484473, recall score: 0.94434200310559


  _warn_prf(average, modifier, msg_start, len(result))
181it [07:06,  2.10s/it]

Loss : 0.07004929588303449, calculated accuracy score: 0.9899760318492038, prediction score : 0.9561205110497238, recall score: 0.9277517499297687


201it [07:52,  2.21s/it]

Loss : 0.08432088011853288, calculated accuracy score: 0.9847591088674275, prediction score : 0.9439287935323384, recall score: 0.9075004034362605


221it [08:37,  2.16s/it]

Loss : 0.08362749424485469, calculated accuracy score: 0.9837428466861858, prediction score : 0.9462388291855203, recall score: 0.9084300897768703


241it [09:24,  2.36s/it]

Loss : 0.08641881937903967, calculated accuracy score: 0.9825405784720527, prediction score : 0.9424171704589257, recall score: 0.9058424857461672


261it [10:10,  2.39s/it]

Loss : 0.09688383688777569, calculated accuracy score: 0.9771417906242956, prediction score : 0.9399688183970613, recall score: 0.8906201694890712


281it [11:01,  2.20s/it]

Loss : 0.10680522124770558, calculated accuracy score: 0.9722171341846346, prediction score : 0.9305625502454018, recall score: 0.8779969118084952


301it [11:53,  2.94s/it]

Loss : 0.11596659156763779, calculated accuracy score: 0.9656323285128005, prediction score : 0.923618482523295, recall score: 0.8598188354022839


321it [12:52,  3.15s/it]

Loss : 0.1295800072416978, calculated accuracy score: 0.9573369067253068, prediction score : 0.9136126978192897, recall score: 0.8377684772101841


341it [13:50,  2.82s/it]

Loss : 0.143776915293081, calculated accuracy score: 0.9494808737277902, prediction score : 0.9053887743761334, recall score: 0.8127353185927209


361it [14:49,  2.80s/it]

Loss : 0.15724939516497835, calculated accuracy score: 0.9420446675900277, prediction score : 0.8993668240359258, recall score: 0.7856652689778004


375it [15:30,  2.48s/it]


Validating


NameError: name 'torch' is not defined

In [15]:
import json

# create json object from dictionary
js = json.dumps(The_results)

# open file for writing, "w"
f = open("training_results.json","a")

# write json object to file
f.write(js)

# close file
f.close()

NameError: name 'The_results' is not defined

## 6) TESTING

In [15]:
# store stats
losses, accuracies = [], []
all_pred = []
for batch in tqdm(test_dataloader):
    # TODO run prediction_step
    loss, accuracy, predictions = batch_prediction(batch, model, device = 'cuda')

    # append to stats
    losses.append(loss)
    accuracies.append(accuracy)
    all_pred.append(predictions)
# average val losses and accuracies over batches
losses, accuracies = np.stack(losses).mean(), np.stack(accuracies).mean()
print(f"valloss {losses:.2f}, val accuracy {accuracies*100:.2f}")

100%|██████████| 65/65 [01:03<00:00,  1.02it/s]

valloss 0.69, val accuracy 82.84





In [25]:
mean_pred = np.stack(all_pred, axis = 0).mean(axis=0)
mean_pred

array([0.30012019, 0.07512019, 0.81850962, 0.94975962, 0.81165865,
       0.69002404, 0.79471154, 0.93173077, 0.88257212, 0.90216346,
       0.99026442, 0.97319712, 0.9890625 , 0.98942308, 0.99639423,
       0.99254808, 0.99507212])

# Saving Models

In [14]:
torch.save(model.state_dict(), "model_multilabel_classification.pth")
print("Saved PyTorch Model State to model_multilabel_classification.pth")

Saved PyTorch Model State to model_multilabel_classification.pth
