# Classification d'image vue de face de veaux avec Unet

## Import des dependances

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from torch.utils.data import Dataset
from transformers import Trainer, TrainingArguments
import torch
import torch.nn as nn
import torchvision.models.segmentation as segmentation
from torchvision.transforms import v2
from torchvision.io import read_image
import numpy as np
import cv2
import os
import sys
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report
from tqdm import tqdm
import random
from PIL import Image as PilImage
from omnixai.data.image import Image
from omnixai.explainers.vision import LimeImage, VisionExplainer
from omnixai.preprocessing.image import Resize
from efficientnet_pytorch import EfficientNet
from torchsampler import ImbalancedDatasetSampler
from IPython.display import clear_output
import timm
import torchvision.models as models
import torch.nn.functional as F

# Local dep
project_dir = '/data/konrad/workspace'
sys.path.insert(0, project_dir)

from helpers.helpers import get_indices, load_face_data, load_local_model
from helpers.datasets import CalfCenterFaceDataset
from helpers.interp import GradCam, generate_cam, display_predicted_cam
from helpers.trainers import train_model, validate_model, plot_metrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2024-08-27 16:37:00.573954: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-27 16:37:00.584840: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-27 16:37:00.588159: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-27 16:37:00.597361: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Classes et utilitaires

In [2]:
# Custom Video Dataset Class
class CalfVideoDataset(Dataset):
    def __init__(self, video_df, images_df, use_face=False, max_frames=5, bbox_size = 800, transform = None):
        self.video_df = video_df.reset_index(drop=True)
        self.images_df = images_df
        self.images_df["id"] = images_df.index
        self.max_frames = max_frames
        self.bbox_size = bbox_size
        self.transform = transform
        self.use_face = use_face

        self.labels = self.video_df["target"]
        
        label_to_count = self.labels.value_counts()

        weights = 1.0 / label_to_count[self.labels]

        self.weights = weights.to_list()

    def __len__(self):
        return len(self.video_df)

    def get_labels(self):
        return self.labels

    def get_class_weights(self):
        return torch.FloatTensor(1.0 / self.labels.value_counts())

    def __getitem__(self, idx):
        row_video = self.video_df.loc[idx]
        label = torch.tensor(row_video["target"], dtype=torch.float32)
        images = self.images_df[self.images_df["video"] == row_video["video"]]
        images = images.sample(self.max_frames).sort_values(by="id")
        
        frames = []
        for i, row in images.iterrows():
            image = PilImage.open(row["path"])
            
            xmin = row['x_min']
            ymin = row['y_min']
            xmax = row['x_max']
            ymax = row['y_max']
            
            bbox_cropped_image = image.crop((xmin, ymin, xmax, ymax))

            image = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])(image)
            bbox_cropped_image = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])(bbox_cropped_image)
            
            # Apply transformations if provided
            if self.transform:
                image = self.transform(image)
                bbox_cropped_image = self.transform(bbox_cropped_image)
    
            frames.append(bbox_cropped_image if self.use_face else image)

        # Plot each image
        # for i, img in enumerate(frames):
        #     plt.figure()
        #     plt.imshow(img.permute(1, 2, 0))  # Normalize for display
        #     plt.title(f"Image {i+1}")
        #     plt.axis('off')  # Hide axes
        #     plt.show()
        video_tensor = torch.stack(frames, dim=0)

        return {"image": video_tensor.to(device), "label": label.to(device), "weight": self.weights[idx]}

In [3]:
class Classifier(nn.Module):
    def __init__(self, 
             num_classes, name, 
             from_pretrained=True, 
             method='sqeeze', 
             dr_rate= 0.2, 
             rnn_hidden_size = 30,
             rnn_num_layers = 2,
             fc_size = 256,
             timestep = 4
        ):
        super(Classifier, self).__init__()
        self.method = method
        self.name = name
        
        baseModel = models.vgg19(pretrained=from_pretrained).features
        i = 0
        for child in baseModel.children():
            for param in child.parameters():
            # if i < 28:
            #     for param in child.parameters():
                    
            # else:
            #     for param in child.parameters():
            #         param.requires_grad = True
            # i +=1
                param.requires_grad = False

        self.baseModel = baseModel
        self.dropout= nn.Dropout(dr_rate)
        # 25088 = 512x7x7
        self.rnn = nn.LSTM(25088, rnn_hidden_size, rnn_num_layers , batch_first=True)
        self.fc1 = nn.Linear(rnn_hidden_size, fc_size)
        self.fc2 = nn.Linear(timestep, num_classes)
 
    def forward(self, x):
        batch_size, time_steps, C, H, W = x.size()
        # reshape input  to be (batch_size * timesteps, input_size)
        x = x.contiguous().view(batch_size * time_steps, C, H, W)
        x = self.baseModel(x)
        x = x.view(x.size(0), -1)
        #make output as  ( samples, timesteps, output_size)
        x = x.contiguous().view(batch_size , time_steps , x.size(-1))
        x , (hn, cn) = self.rnn(x)
        print(x.shape)
        x = F.relu(self.fc1(x[:, -1, :])) # get output of the last  lstm not full sequence
        # x = self.dropout(x)
        print(x.shape)
        x = torch.mean(x, dim=1)
        print(x.shape)
        x = self.fc2(x)
        print(x.shape)
        return x

## Entrainement du modele

### Dataset setup

In [4]:
ROOT_DIR = "/data/konrad/workspace"
IMAGE_SIZE = 224
train_on_face = False
image_per_video = 4

dataset_type = "subsplt"
image_type = "splt"
video_type = "subsplt"

train_images_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{image_type}_train_image_extracted_metadata.csv', index_col=False)
valid_images_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{image_type}_test_image_extracted_metadata.csv', index_col=False)
test_images_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{image_type}_val_image_extracted_metadata.csv', index_col=False)
training_images_df = pd.concat([train_images_df, valid_images_df], ignore_index=True)

train_videos_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{video_type}_train_video_extracted_metadata.csv', index_col=False)
valid_videos_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{video_type}_test_video_extracted_metadata.csv', index_col=False)
test_videos_df = pd.read_csv(ROOT_DIR + f'/csv_files/mixed_10s_b0s_y7_1/{video_type}_val_video_extracted_metadata.csv', index_col=False)

# label_col = "bilabel"

label_col = "label"
train_videos_df = train_videos_df[~ (train_videos_df[label_col] == "Diarrhé, Pneumonie")]
valid_videos_df = valid_videos_df[~ (valid_videos_df[label_col] == "Diarrhé, Pneumonie")]
test_videos_df = test_videos_df[~ (test_videos_df[label_col] == "Diarrhé, Pneumonie")]

labels = train_videos_df[label_col].unique()
label2id = {l:i for i, l in enumerate(labels)}
id2label = {i:l for i, l in enumerate(labels)}

num_labels = len(labels)
# num_labels = 1

train_videos_df['target'] = train_videos_df.apply(lambda row: label2id[row[label_col]], axis=1)
valid_videos_df['target'] = valid_videos_df.apply(lambda row: label2id[row[label_col]], axis=1)
test_videos_df['target'] = test_videos_df.apply(lambda row: label2id[row[label_col]], axis=1)


# train_transform = v2.Compose([
#     v2.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
#     v2.RandomHorizontalFlip(p=0.5),
#     v2.AutoAugment(v2.AutoAugmentPolicy.IMAGENET),
#     v2.ToDtype(torch.float32, scale=True),
#     v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
# ])
test_transform = v2.Compose([
    v2.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CalfVideoDataset(train_videos_df, training_images_df, use_face=train_on_face, max_frames=image_per_video,  bbox_size = IMAGE_SIZE, transform=test_transform)
test_dataset = CalfVideoDataset(test_videos_df, training_images_df, use_face=train_on_face, max_frames=image_per_video,  bbox_size = IMAGE_SIZE, transform=test_transform)
valid_dataset = CalfVideoDataset(valid_videos_df, test_images_df, use_face=train_on_face, max_frames=image_per_video,  bbox_size = IMAGE_SIZE, transform=test_transform)

# Data loaders
batch_size = 12
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=ImbalancedDatasetSampler(train_dataset))
valid_loader = DataLoader(valid_dataset, batch_size=batch_size * 2, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size * 2, shuffle=False)

### Entrainement et validation

In [5]:
model_name = f"lstm_cnn_{dataset_type}_{label_col}_{batch_size}"
model = Classifier(
    num_classes=num_labels, name=model_name, from_pretrained=True,
    method='sqeeze', 
    dr_rate= 0.3, 
    rnn_hidden_size = 40,
    rnn_num_layers = 1,
    fc_size = 160,
)  # Adjust num_classes according to your dataset
model.to(device)

# Loss and optimizer
lr = 5e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=True)

epochs = 50
patience = 5

if num_labels == 1:
    average = "binary"
else:
    average = "weighted"

output_dir = ROOT_DIR + "/training_log"
metrics_scores = train_model(model, labels, train_loader, test_loader, scheduler, patience, optimizer, output_dir, num_epochs = epochs, average=average)
clear_output()
plot_metrics(metrics_scores)



Training on 372 and validating on 94 datas


  0%|                                                                                            | 0/31 [00:03<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 588.00 MiB. GPU 

In [None]:
model.load_state_dict(torch.load(f'{output_dir}/models/best_{model.name}_model.pth'))
validate_model(model, valid_loader, labels, average=average)

## Interpretation with Lime

In [None]:
# model = Classifier(num_classes=len(labels), name=model_name, from_pretrained=False)  # Adjust num_classes according to your dataset
# model.load_state_dict(torch.load(f'{output_dir}/models/best_{model.name}_model.pth'))
# model = model.to(device)

In [None]:
# test_df = valid_loader.dataset
# max_item = 20
# test_images = [Resize((256, 256)).transform(Image(test_df[p]["image"].cpu().numpy(), channel_last = False)).to_numpy() for p in range(max_item)]
# img = Image( data=np.concatenate(test_images), batched=True)

# # The preprocessing function
# transform = v2.Compose([
#     v2.Resize(256),
#     v2.CenterCrop(224),
#     v2.ToImage(),
#     v2.ToDtype(torch.float32, scale=True),
#     v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])
# preprocess = lambda ims: torch.stack([transform(im.to_pil()) for im in ims]).to(device)
# postprocess = lambda logits: torch.nn.functional.softmax(logits, dim=1)

# # target_head = model.conv_head
# # target_layer = model.conv_head
# explainer = VisionExplainer(
#     explainers=[
#         # "gradcam",
#         "lime", 
#         # "ig",
#         # "ce",
#         # "scorecam",
#         # "smoothgrad", 
#         # "guidedbp", 
#         # "layercam"
#     ],
#     mode="classification",
#     model=model,
#     preprocess=preprocess,
#     postprocess=postprocess,
#     params={
#         # "gradcam": {"target_layer": target_head},
#         # "ce": {"binary_search_steps": 2, "num_iterations": 100},
#         # "scorecam": {"target_layer": target_head},
#         # "layercam": {"target_layer": target_layer},
#     }
# )

# # Generate explanations
# local_explanations = explainer.explain(img)
# clear_output()

# for row_id in range(max_item):
#     row = test_df[row_id]
#     print(f"Interpretation pour image {row_id}: \n")
#     plt.imshow(row["image"].cpu().permute(1, 2, 0))
#     plt.show()
#     print(f"De classe: {id2label[row['label'].cpu().item()]}: \n")
#     for name, explanations in local_explanations.items():
#         print(f"{name}:")
#         explanations.ipython_plot(row_id, class_names=labels)

# print("-" * 40)