Setup

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import torch
import CNN

# from config_plots import global_settings
# global_settings()
import warnings
warnings.filterwarnings("ignore")

Hyper parameters

In [2]:
# defining global variables
experimentName = "50_11_hier_species_twice_phase7"
showListOfSpecies = False

from configParser import ConfigParser, getModelName
config_parser = ConfigParser(experimentName)

import os
experimentName = os.path.join(experimentName, "multi-trial")
import TrialStatistics
ts = TrialStatistics.TrialStatistics(experimentName)
ts_genus = TrialStatistics.TrialStatistics(experimentName, "genus")

import dataLoader

Cuda support

In [3]:
# CUDA support 
if torch.cuda.is_available():
    torch.cuda.set_device(0) # 0
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    print("We are using cuda")

We are using cuda


Iterate

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from confusion_matrix_plotter import plot_confusion_matrix2, generate_classification_report
import numpy as np
import os
import progressbar

paramsIterator = config_parser.getHyperpIter()  
number_of_experiments = sum(1 for e in paramsIterator)
paramsIterator = config_parser.getHyperpIter()  
experiment_index = 0

datasetManager = dataLoader.datasetManager(experimentName, showListOfSpecies)
with progressbar.ProgressBar(max_value=number_of_experiments) as bar:
    for experiment_params in paramsIterator:
        bar.update(experiment_index)
        experiment_params["numOfTrials"] = experiment_params["numOfTrials"] or 1

        print("experiment ", experiment_index+1, "/", number_of_experiments, ": ", experiment_params)

        # load images
        datasetManager.updateParams(experiment_params)
        dataset = datasetManager.getDataset()
        speciesList = dataset.getSpeciesList()
        numberOfSpecies = len(speciesList)
        numberOfGenus = len(dataset.getGenusList())

        confusionMatricesPerTrial = []

        for i in range(experiment_params["numOfTrials"]):
            trialName = os.path.join(experimentName, getModelName(experiment_params, i))

            # Train/Load model
            architecture = {
                "species": numberOfSpecies,
                "genus" : numberOfGenus
            }
            model = CNN.create_model(architecture, experiment_params)
            train_loader, validation_loader, test_loader = datasetManager.getLoaders()
            if os.path.exists(CNN.getModelFile(trialName)):
                df, epochs, time_elapsed = CNN.loadModel(model, trialName)
                print("Model {0} loaded!".format(trialName))
            else:
                df, epochs, time_elapsed = CNN.trainModel(train_loader, validation_loader, experiment_params, model, trialName, test_loader)
            
            # Update trial outcomes for statistics
            predlist, lbllist = CNN.getLoaderPredictions(test_loader, model, experiment_params)
            ts.addTrialPredictions(experiment_params, predlist, lbllist, numberOfSpecies)
            micro_f1 = f1_score(lbllist.cpu(), predlist.cpu(), average='macro')
            
            predlist, lbllist = CNN.getLoaderPredictionProbabilities(test_loader, model, experiment_params)
            topk = CNN.top_k_acc(predlist, lbllist, topk=(3,5))
            
            predlist, lbllist = CNN.getLoaderPredictions(test_loader, model, experiment_params, 'genus')
            ts_genus.addTrialPredictions(experiment_params, predlist, lbllist, numberOfGenus)
            micro_f1_genus = f1_score(lbllist.cpu(), predlist.cpu(), average='macro')

            predlist, lbllist = CNN.getLoaderPredictions(validation_loader, model, experiment_params)
            macro_f1_val = f1_score(lbllist.cpu(), predlist.cpu(), average='macro')
            
            ts.addTrial(experiment_params,
                    {'loss': CNN.getCrossEntropyFromLoader(test_loader, model, experiment_params),
                     'average best guess prob': CNN.getAvgProbBestGuessFromLoader(test_loader, model, experiment_params),
                     'average correct guess prob': CNN.getAvgProbCorrectGuessFromLoader(test_loader, model, experiment_params),
                     'entropy': CNN.getAvgEntropyFromLoader(test_loader, model, experiment_params),
                     'accuracy': CNN.getAccuracyFromLoader(test_loader, model, experiment_params),
                     'macro_f1_species': micro_f1,
                     'macro_f1_genus': micro_f1_genus,
                     'time': time_elapsed,
                     'epochs': epochs,
                     'macro f1 validation': macro_f1_val,
                     'top-3': topk[0].cpu().numpy(),
                     'top-5': topk[1].cpu().numpy(),
                    }, i)
        
        experiment_index = experiment_index + 1

N/A% (0 of 1) |                          | Elapsed Time: 0:00:00 ETA:  --:--:--

experiment  1 / 1 :  {'image_path': '/data/BGNN_data/INHS_cropped', 'suffix': '50_11', 'training_count': 0.64, 'validation_count': 0.16, 'batchSize': 2650, 'n_epochs': 10000, 'learning_rate': 0.001, 'numOfTrials': 3, 'patience': 200, 'useHeirarchy': True, 'downsample': True, 'downsampleOutput': 0, 'takeFromIntermediate': True, 'takeFromIntermediateOutput': 200, 'lambda': -1, 'unsupervisedOnTest': False, 'fc_layers': 1, 'resnet': '18', 'normalizeFromResnet': True, 'dummy': 0}
Creating dataset...
Loading dataset...
Going through image files


100% (550 of 550) |######################| Elapsed Time: 0:00:00 Time:  0:00:00


Creating dataset... Done.


  0% (3 of 550) |                        | Elapsed Time: 0:00:00 ETA:   0:00:24

Loading saved indices...
file /data/BGNN_data/INHS_cropped/50_11/50_11_hier_species_twice_phase7/multi-trial/tc0.640000_vc0.160000/trainingIndex.pkl read
trainingIndex.pkl 352
file /data/BGNN_data/INHS_cropped/50_11/50_11_hier_species_twice_phase7/multi-trial/tc0.640000_vc0.160000/valIndex.pkl read
valIndex.pkl 88
file /data/BGNN_data/INHS_cropped/50_11/50_11_hier_species_twice_phase7/multi-trial/tc0.640000_vc0.160000/testIndex.pkl read
testIndex.pkl 110
Creating loaders...
Creating loaders... Done.
Training started...
Transfrom images...


100% (550 of 550) |######################| Elapsed Time: 0:00:42 Time:  0:00:42
  0% (30 of 10000) |                     | Elapsed Time: 0:01:58 ETA:   5:09:02

Analyze

In [None]:
ts.showStatistics()
ts.saveStatistics()

In [None]:
ts.showStatistics(False)
ts.saveStatistics(False)

In [None]:
paramsIterator = config_parser.getHyperpIter() 
for experiment_params in paramsIterator:
    experiment_params["numOfTrials"] = experiment_params["numOfTrials"] or 1
    
    print("experiment: ", experiment_params)
    
    datasetManager.updateParams(experiment_params)
    dataset = datasetManager.getDataset()
    speciesList = dataset.getSpeciesList()
    ts.printTrialConfusionMatrix(experiment_params, speciesList, True)
    ts.printF1table(experiment_params, dataset)

In [None]:
paramsIterator = config_parser.getHyperpIter() 
for experiment_params in paramsIterator:
    experiment_params["numOfTrials"] = experiment_params["numOfTrials"] or 1
    if experiment_params["useHeirarchy"]:
        print("experiment: ", experiment_params)

        datasetManager.updateParams(experiment_params)
        dataset = datasetManager.getDataset()
    
        genusList = dataset.getGenusList()
        ts_genus.printTrialConfusionMatrix(experiment_params, genusList, True)
        ts_genus.printF1table(experiment_params, dataset)

In [None]:
ts.trialScatter('accuracy', 'time', False)