import

In [1]:
import matplotlib.pyplot as plt
import torch
import os
import pandas as pd
import math

from tqdm.notebook import tqdm as tqdm
from tqdm.auto import trange

import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.insert(1, '../train')
sys.path.insert(1, '../misc')

import config_plots, TrialStatistics
import CNN, dataLoader
from configParser import ConfigParser, getModelName
from flashtorch_modefied import CNN_wrapper, Backprop, SaliencyMap

config_plots.global_settings()

experimetnsFileName = "experiments.csv"

parameters

In [2]:
experimentsPath="/raid/elhamod/Fish/official_experiments/" # Where experiment results will be produced
dataPath="/raid/elhamod/Fish/Curated4" # Where data is
experimentName="Easy"

trial_hash="268af0509863b63158fbca64174789deb89ce5648a76506daf7f8db5" #HGNN

num_workers=8
patchsize=49

cuda=1

cuda

In [3]:
# set cuda
if torch.cuda.is_available():
    torch.cuda.set_device(cuda)
    print("using cuda", cuda)

using cuda 1


importLoad experiment

In [4]:
config_parser = ConfigParser(experimentsPath, dataPath, experimentName)

experimentPathAndName = os.path.join(experimentsPath, experimentName)

datasetManager = dataLoader.datasetManager(experimentPathAndName, dataPath)

paramsIterator = config_parser.getExperiments()  
number_of_experiments = sum(1 for e in paramsIterator)  

load dataset and model

In [5]:
# Get experiment parameters
experimentsFileNameAndPath = os.path.join(experimentsPath, experimetnsFileName)
if os.path.exists(experimentsFileNameAndPath):
    experiments_df = pd.read_csv(experimentsFileNameAndPath)
else:
    raise Exception("Experiment not " + trial_hash + " found!")
experimentRecord = experiments_df[experiments_df["trialHash"] == trial_hash]
experiment_params = experimentRecord.to_dict('records')[0]

# This is needed to get saliency map for a model that does not do back propagation.
experiment_params['noSpeciesBackprop'] = False 

if math.isnan(experiment_params['suffix']):
    experiment_params['suffix'] = None
print(experiment_params)


# Create the model
datasetManager.updateParams(config_parser.fixPaths(experiment_params))
train_loader, validation_loader, test_loader = datasetManager.getLoaders()
fineList = test_loader.dataset.csv_processor.getFineList()
coarseList = test_loader.dataset.csv_processor.getCoarseList()
numberOffine = len(fineList)
numberOfcoarse = len(coarseList)
architecture = {
    "fine": numberOffine,
    "coarse" : numberOfcoarse
}
model = CNN.create_model(architecture, experiment_params, device=cuda)

{'experimentName': 'Easy', 'modelName': 'models/268af0509863b63158fbca64174789deb89ce5648a76506daf7f8db5', 'datasetName': nan, 'experimentHash': 'fbf4c275e857df725e65ca95f294905a0966e68874b376dc8d1d1da9', 'trialHash': '268af0509863b63158fbca64174789deb89ce5648a76506daf7f8db5', 'image_path': 'Easy_50', 'suffix': None, 'img_res': 448, 'augmented': True, 'batchSize': 64, 'learning_rate': 0.0001, 'numOfTrials': 5, 'modelType': 'HGNN', 'lambda': 0.01, 'pretrained': True, 'tl_model': 'ResNet18', 'link_layer': 'avgpool', 'adaptive_smoothing': True, 'adaptive_lambda': 0.01, 'adaptive_alpha': 0.9, 'fc_layers': 1, 'unsupervisedOnTest': nan, 'noSpeciesBackprop': False, 'phylogeny_loss': nan, 'phylogeny_loss_epsilon': nan}
Creating datasets...
Creating datasets... Done.
Creating loaders...
Creating loaders... Done.


Load model of a specific trial

In [6]:
# get the model and the parameters
modelName = experimentRecord.iloc[0]["modelName"]
trialName = os.path.join(experimentPathAndName, modelName)

df, epochs, time_elapsed = CNN.loadModel(model, trialName, device=cuda)

Model wrapper

In [7]:
wrapped_model = CNN_wrapper(model, experiment_params, test_loader.dataset)
saliencyMap = SaliencyMap(test_loader.dataset, wrapped_model, experimentPathAndName, trial_hash, experiment_params)

Iterate and give score

In [8]:
from PIL import Image

import gc


def getAverageCorrectProb(loader, layerName, box_width, topk, df):
    prob = []
    with tqdm(total=len(loader.dataset)) as bar:
        colName = layerName + "/box" + "/iter" + str(topk)
        df[colName, "prob"] = ""
        df[colName, "predictedFine"] = ""
        df[colName, "withinSameGenus"] = ""
        for img in loader:
            fileNames = img['fileNameFull']
            fileName_bases =  img['fileName']
            lbls=img[layerName]
            
            for i, fileName in enumerate(fileNames):
                fileName_base = fileName_bases[i]
                if topk > 0:
                    _, A = saliencyMap.GetSaliencyMap(fileName,layerName,box_width =box_width, maxCovered=True, topk=topk, plot=False, use_gpu=True,generate_all_steps=False)
                else:
                    isSpecies = (layerName == "fine")
                    activationOutputs = {
                        "fine": isSpecies,
                        "coarse" : not isSpecies
                    }
                    img = saliencyMap.getTransformedImage(Image.open(fileName), False, True)
                    wrapped_model.setOutputsOfInterest(activationOutputs)

                    if torch.cuda.is_available():
                        img = img.cuda()
                    A = wrapped_model(img)
                    
                if (layerName == "coarse" or layerName == "fine"):
                    A = torch.nn.Softmax(dim=1)(A)
                
                if torch.cuda.is_available():
                    A = A.detach().cpu()
                prob_ = A[0][lbls[i]]
                prob.append(prob_)

                fine = loader.dataset.csv_processor.getFineLabel(fileName_base)
                coarse = loader.dataset.csv_processor.getCoarseLabel(fileName_base)
                pred_fine = loader.dataset.csv_processor.getFineList()[torch.argmax(A[0])] 
                right_genus = loader.dataset.csv_processor.getCoarseFromFine(pred_fine) == loader.dataset.csv_processor.getCoarseFromFine(fine)
                
                if fileName_base not in df.index: 
                    temp = pd.DataFrame({
                        "fileName": [fileName_base],
                        "fine": [fine],
                        "coarse": [coarse],
                    })
                    temp = temp.set_index( "fileName")
                    df = df.append(temp)
                
                df.loc[fileName_base][colName, "prob"] = prob_.item()
                df.loc[fileName_base][colName, "predictedFine"] = pred_fine
                df.loc[fileName_base][colName, "withinSameGenus"] = right_genus

                bar.update()
                

    return (sum(prob) / len(prob)).item(), df


def add_occlusion_result(df, loader, func, label, patchsize, iterations, df2):
    result, df2 = func(loader, label, patchsize, iterations, df2)
    return df.append(pd.DataFrame({
        "label": [label],
        "patch size": [patchsize],
        "iterations": [iterations],
        "average correct probability": [result]
    })), df2

In [9]:
# Create the test loader with small batch
test_loader = torch.utils.data.DataLoader(test_loader.dataset, batch_size=100, num_workers=num_workers)

df = pd.DataFrame()
df2 = pd.DataFrame(columns=['fileName', 'fine', 'coarse'])
df2 = df2.set_index('fileName')

In [10]:
df, df2 = add_occlusion_result(df, test_loader, getAverageCorrectProb, "fine", patchsize, 0, df2)

HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))




In [11]:
df, df2 = add_occlusion_result(df, test_loader, getAverageCorrectProb, "fine", patchsize, 1, df2)

HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))




In [12]:
df, df2 = add_occlusion_result(df, test_loader, getAverageCorrectProb, "fine", patchsize, 2, df2)

HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))




In [13]:
df, df2 = add_occlusion_result(df, test_loader, getAverageCorrectProb, "fine", patchsize, 3, df2)

HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))




In [14]:
df, df2 = add_occlusion_result(df, test_loader, getAverageCorrectProb, "fine", patchsize, 4, df2)

HBox(children=(FloatProgress(value=0.0, max=380.0), HTML(value='')))




In [15]:
df.reset_index().to_csv(os.path.join(experimentPathAndName, "models", trial_hash, 'occlusion_summary.csv'))

In [16]:
df

Unnamed: 0,label,patch size,iterations,average correct probability
0,fine,49,0,0.481954
0,fine,49,1,0.369082
0,fine,49,2,0.307433
0,fine,49,3,0.256257
0,fine,49,4,0.21529
