# Inference on batch
Helper code to test a model on batch of images. We keep track of the results of prediction/inference in a csv (Metrics part) then we can chose to display examples of images were mistaken for another class. This way we can try to see patterns in why some images are incorrectly classified.

## Load model

In [7]:
import os
import time
from datetime import datetime
import pandas as pd
import torch
import torch.nn as nn
import torchvision.models as Model


BATCH_SIZE = 256
MODEL_NAME = 'EffB4'


NETS = {
    'EffB3': {'input_size': 300, 'model': Model.efficientnet_b3},
    'EffB4': {'input_size': 380, 'model': Model.efficientnet_b4},
    'EffB5': {'input_size': 456, 'model': Model.efficientnet_b5},
    'EffB6': {'input_size': 528, 'model': Model.efficientnet_b6},
    'EffB7': {'input_size': 600, 'model': Model.efficientnet_b7},
    }
device = torch.device('cpu')

DATA = 'data/train' # a changer selon les images à tester

MODEL_TORCH = NETS[MODEL_NAME]['model']
INPUT_SIZE = NETS[MODEL_NAME]['input_size']

In [8]:
def build_model(model: Model) -> Model:
    # freeze first layers
    for param in model.parameters():
        param.requires_grad = False
    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model.classifier[1].in_features
    # to try later : add batch normalization and dropout
    model.classifier[1] = nn.Linear(num_ftrs, 10)
    model = model.to(device)
    return model

def load_model_inference(state_dict_path: str):
    model = build_model(MODEL_TORCH())
    # Initialize model with the pretrained weights
    model.load_state_dict(torch.load(state_dict_path, map_location=device)['model_state_dict']) # pour B5_2022-02-09_13 rajouter ['model_state_dict'] avant derniere parenthese
    model.to(device)
    # set the model to inference mode
    model.eval()
    return model

In [4]:
model = load_model_inference('models/EffB4_2022-03-02_08/EffB4_2022-03-02_08.pth')

In [None]:
loader =  transforms.Compose([
            ConvertRgb(),
            Rescale(random.randint(int(INPUT_SIZE*1.2), int(INPUT_SIZE*1.5))),
            transforms.RandomCrop(INPUT_SIZE, pad_if_needed=True),
            transforms.RandomRotation(degrees=(-5,5)),
            transforms.RandomPerspective(distortion_scale=0.2),
            transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0),
        ])

def test_image( path):
    t = time.time()
    im = Image.open(path)
    im = loader(im)
    # display(im.resize((300,int(300*im.size[1]/im.size[0])))) # display image in notebook
    display(im)
    return 

## Metrics part

In [9]:
from seaborn import heatmap
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from prepare_data import load_dataset, get_classes, get_dataloader

dataset = load_dataset(DATA, input_size=INPUT_SIZE, mode="train")
classes = get_classes(dataset)
dataloader = get_dataloader(dataset, batch_size=BATCH_SIZE)

In [10]:
def write_metrics_model(model: Model, output_folder: str) -> None:
    """ Computes metrics to evaluate model
        Prints: accuracy, precision, recall
        Writes: confusion matrix

    Args:
        model (Model): torch model to evaluate
        training_name (str): id l'entraînement
    """
    start = time.time()
    os.makedirs(output_folder, exist_ok=True)
    
    prefix = '-'.join(DATA.split('/')) + '-' + datetime.now().isoformat("_", "minutes")

    # Initialize the prediction and label lists(tensors)
    predlist=torch.zeros(0,dtype=torch.long, device='cpu')
    lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
    outputlist=torch.zeros(0,dtype=torch.long, device='cpu')

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            if i%100==0:
                print(f'Batch {i}/{len(dataloader)}, time : {round(time.time()-start, 2)} secs')
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            # Append batch prediction results
            predlist=torch.cat([predlist,preds.view(-1).cpu()])
            lbllist=torch.cat([lbllist,labels.view(-1).cpu()])
            outputlist=torch.cat([outputlist,torch.nn.functional.softmax(outputs, dim=1).cpu().detach()])

    # Confusion matrix
    y_test, y_pred = lbllist.numpy(), predlist.numpy()
    cm = confusion_matrix(y_test, y_pred, normalize='true')
    df_cm = pd.DataFrame(cm, index = [i for i in classes], columns = [i for i in classes])
    df_cm.to_csv(f'{output_folder}/{prefix}-confusion-matrix.csv') # visualize with sn heatmap

    # Other scores
    acc = accuracy_score(y_test, y_pred, normalize=True)
    prec = precision_score(y_test, y_pred, average='macro', zero_division=0)
    rec = recall_score(y_test, y_pred, average='macro', zero_division=0)
    with open(f'{output_folder}/{prefix}-details.txt', 'w') as outfile:
        outfile.write(f'Accuracy = {round(acc, 3)}\n')
        outfile.write(f'Precision = {round(prec, 3)}\n')
        outfile.write(f'Recall = {round(rec, 3)}\n')

    # Details of predictions probabilities
    probas = outputlist.numpy().transpose() # each line is the probas for this class
    all_lines = {'filename': [x[0] for x in dataset.imgs],
                'label': [classes[x[1]] for x in dataset.imgs],
                'max_pred': [classes[x] for x in y_pred]}
    for i in range(len(classes)):
        all_lines[classes[i]] = probas[i]
    df_prob = pd.DataFrame(all_lines)
    df_prob.to_csv(f'{output_folder}/{prefix}-probas.csv', index=False)
    print('FINISHED !!')

In [None]:
write_metrics_model(model, 'B5_2022-02-09_13-on-dataset')

## Visualisation part

In [5]:
import numpy as np
from PIL import Image
from torchvision import transforms
from prepare_data import ConvertRgb, Rescale, RandomPad

loader =  transforms.Compose([
            ConvertRgb(),
            Rescale(456),
            RandomPad(456),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

def test_image(model, path):
    t = time.time()
    im = Image.open(path)
    image = loader(im).float()
    image = image.unsqueeze(0).to(device)
    output = model(image)
    probs = nn.functional.softmax(output, dim=1).detach().numpy()[0]
    res = [(classes[i], round(probs[i]*100,2)) for i in range(len(classes))]
    res.sort(key=lambda x:x[1], reverse=True)
    display(im.resize((300,int(300*im.size[1]/im.size[0])))) # display image in notebook
    return res, f'Time : {round(time.time()-t, 3)} secs'


def show_confusion_matrix(matrix_path: str):
    df = pd.read_csv(matrix_path, index_col=0)
    fig, ax = plt.subplots(figsize=(10,8))
    heatmap(df, annot=True)


def show_images_of_label_predicted_as(in_df: pd.DataFrame, true_label: str, pred_label: str, limit=20):
    df = in_df[(in_df.label==true_label) & (in_df.max_pred==pred_label)]
    df = df.sort_values(by=pred_label, ascending=False)
    df = df.reset_index()
    print(len(df), 'images found')

    columns = 1
    plt.figure(figsize=(80,600))
    for index, row in df.head(limit).iterrows():
        path = row['filename']
        im = Image.open(path)
        plt.subplot(len(df) / columns + 1, columns, index + 1).set_title(round(row[pred_label],3))
        plt.axis('off')
        plt.imshow(np.asarray(im))

In [None]:
test_image(model, 'test-images/pistolet_1.jpg')

In [None]:
df = pd.read_csv('B5_2022-02-07-on-dataset/data-val2022-02-14_10:48-probas.csv')
show_images_of_label_predicted_as(df, 'autre_pistolet', 'pistolet_semi_auto_moderne')

# Single folder dataset

## Metrics part

In [86]:
DATA = 'data/val/epaule_a_mecanisme_ancien' # a changer selon les images à tester

In [None]:
BATCH_SIZE = 2

EFFICIENTNETS = {
    'B0': 224, 'B1': 240,
    'B2': 288, 'B3': 300,
    'B4': 380, 'B5': 456,
    'B6': 528, 'B7': 600
    }

MODEL_NAME = 'B5'
MODEL_TORCH = Model.efficientnet_b5
INPUT_SIZE = EFFICIENTNETS[MODEL_NAME]


classes = ['autre_epaule', 'autre_pistolet', 'epaule_a_levier_sous_garde',
        'epaule_a_percussion_silex', 'epaule_a_pompe', 'epaule_a_un_coup', 'epaule_a_verrou',
        'pistolet_a_percussion_silex', 'pistolet_semi_auto_moderne', 'revolver']
dataset = load_dataset(DATA, input_size=INPUT_SIZE, mode="single")
dataloader = get_dataloader(dataset, batch_size=BATCH_SIZE)

In [None]:
def write_metrics_model(model: Model, output_folder: str) -> None:
    """ Computes metrics to evaluate model
        Prints: accuracy, precision, recall
        Writes: confusion matrix

    Args:
        model (Model): torch model to evaluate
        training_name (str): id l'entraînement
    """
    start = time.time()
    os.makedirs(output_folder, exist_ok=True)
    
    prefix = '-'.join(DATA.split('/')) + '-' + datetime.now().isoformat("_", "minutes")

    # Initialize the prediction and label lists(tensors)
    predlist=torch.zeros(0,dtype=torch.long, device='cpu')
    outputlist=torch.zeros(0,dtype=torch.long, device='cpu')

    with torch.no_grad():
        for i, inputs in enumerate(dataloader):
            if i%100==0:
                print(f'Batch {i}/{len(dataloader)}, time : {round(time.time()-start, 2)} secs')
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            # Append batch prediction results
            predlist=torch.cat([predlist,preds.view(-1).cpu()])
            outputlist=torch.cat([outputlist,torch.nn.functional.softmax(outputs, dim=1).cpu().detach()])

    # Details of predictions probabilities
    y_pred = predlist.numpy()
    probas = outputlist.numpy().transpose() # each line is the probas for this class
    all_lines = {'filename': [x for x in dataset.imgs],
                'max_pred': [classes[x] for x in y_pred]}
    for i in range(len(classes)):
        all_lines[classes[i]] = probas[i]
    df_prob = pd.DataFrame(all_lines)
    df_prob.to_csv(f'{output_folder}/{prefix}-probas.csv', index=False)
    print('FINISHED !!')

In [None]:
def build_model(model: Model) -> Model:
    # freeze first layers
    for param in model.parameters():
        param.requires_grad = False
    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model.classifier[1].in_features
    # to try later : add batch normalization and dropout
    model.classifier[1] = nn.Linear(num_ftrs, len(classes))
    model = model.to(device)
    return model

def load_model_inference(state_dict_path: str):
    model = build_model(MODEL_TORCH())
    # Initialize model with the pretrained weights
    model.load_state_dict(torch.load(state_dict_path, map_location=torch.device('cpu'))) # pour B5_2022-02-09_13 rajouter ['model_state_dict'] avant derniere parenthese
    model.to(device)
    # set the model to inference mode
    model.eval()
    return model

In [None]:
model = load_model_inference('models/B5_2022-02-07/B5_2022-02-07.pth')

In [None]:
write_metrics_model(model, 'B5_2022-02-07-on-dataset')

## Visualisation part

In [None]:
from prepare_data_tests import ConvertRgb, Rescale, RandomPad

loader =  transforms.Compose([
            ConvertRgb(),
            Rescale(456),
            RandomPad(456),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

def test_image(model, path):
    t = time.time()
    im = Image.open(path)
    image = loader(im).float()
    image = image.unsqueeze(0).to(device)
    output = model(image)
    probs = nn.functional.softmax(output, dim=1).detach().numpy()[0]
    res = [(classes[i], round(probs[i]*100,2)) for i in range(len(classes))]
    res.sort(key=lambda x:x[1], reverse=True)
    display(im.resize((300,int(300*im.size[1]/im.size[0])))) # display image in notebook
    return res, f'Time : {round(time.time()-t, 3)} secs'


def show_confusion_matrix(matrix_path: str):
    df = pd.read_csv(matrix_path, index_col=0)
    fig, ax = plt.subplots(figsize=(10,8))
    heatmap(df, annot=True)


def show_images_predicted_as(in_df: pd.DataFrame, pred_label: str, ascending=False, limit=20):
    df = in_df[in_df.max_pred==pred_label]
    df = df.sort_values(by=pred_label, ascending=ascending)
    df = df.reset_index()
    print(len(df), 'images found')

    columns = 3
    plt.figure(figsize=(20,100))
    for index, row in df.head(limit).iterrows():
        path = row['filename']
        im = Image.open(path)
        plt.subplot(len(df) / columns + 1, columns, index + 1).set_title(round(row[pred_label],3))
        plt.axis('off')
        plt.imshow(np.asarray(im))

In [None]:
df = pd.read_csv('B5_2022-02-07-on-dataset/data-val-epaule_a_percussion_silex-2022-02-14_15:35-probas.csv')
show_images_predicted_as(df, 'epaule_a_un_coup')