Set experiment parameters here

In [1]:
experimentsPath="/raid/elhamod/Fish/official_experiments/" # Where experiment results will be produced
dataPath="/raid/elhamod/Fish" # Where data is
experimentName = "dummy_experiment"
device = 1
detailed_reporting = True

In [2]:
experimetnsFileName = "experiments.csv"

In [3]:
import matplotlib.pyplot as plt
import torch
import sys
import os
import pandas as pd
from tqdm import tqdm
from tqdm.auto import trange

sys.path.insert(1, '../misc')
import config_plots
import TrialStatistics
import CNN
import dataLoader
from configParser import ConfigParser, getModelName, getDatasetName
config_plots.global_settings()

In [4]:
experimentPathAndName = os.path.join(experimentsPath, experimentName)

# set cuda
if device is not None:
    print("using cuda", device)
    torch.cuda.set_device(device)
else:
    print("using cpu")

# get experiment params
config_parser = ConfigParser(experimentsPath, dataPath, experimentName)

# init experiments file
experimentsFileNameAndPath = os.path.join(experimentsPath, experimetnsFileName)

paramsIterator = config_parser.getExperiments()  
number_of_experiments = sum(1 for e in paramsIterator)  
experiment_index = 0

# Loop through experiments
with tqdm(total=number_of_experiments, desc="experiment") as bar:
    for experiment_params in config_parser.getExperiments():
        print(experiment_params)
        experimentHash =TrialStatistics.getTrialName(experiment_params)

        # load images 
        datasetManager = dataLoader.datasetManager(experimentPathAndName, dataPath)
        datasetManager.updateParams(config_parser.fixPaths(experiment_params))
        train_loader, validation_loader, test_loader = datasetManager.getLoaders()
        architecture = {
            "fine": len(train_loader.dataset.csv_processor.getFineList()),
            "coarse" : len(train_loader.dataset.csv_processor.getCoarseList())
        }

        # Loop through n trials
        for i in trange(experiment_params["numOfTrials"], desc="trial"):
            modelName = getModelName(experiment_params, i)
            trialName = os.path.join(experimentPathAndName, modelName)
            trialHash = TrialStatistics.getTrialName(experiment_params, i)

            row_information = {
                'experimentName': experimentName,
                'modelName': modelName,
                'datasetName': getDatasetName(config_parser.fixPaths(experiment_params)),
                'experimentHash': experimentHash,
                'trialHash': trialHash
            }
            row_information = {**row_information, **experiment_params} 
            print(row_information)

            # Train/Load model
            model = CNN.create_model(architecture, experiment_params, device=device)

            if os.path.exists(CNN.getModelFile(trialName)):
                print("Model {0} found!".format(trialName))
            else:
                initModelPath = CNN.getInitModelFile(experimentPathAndName)
                if os.path.exists(initModelPath):
                    model.load_state_dict(torch.load(initModelPath))
                    print("Init Model {0} found!".format(initModelPath))
                CNN.trainModel(train_loader, validation_loader, experiment_params, model, trialName, test_loader, device=device, detailed_reporting=detailed_reporting)

            # Add to experiments file
            if os.path.exists(experimentsFileNameAndPath):
                experiments_df = pd.read_csv(experimentsFileNameAndPath)
            else:
                experiments_df = pd.DataFrame()

            record_exists = not (experiments_df[experiments_df['modelName'] == modelName][experiments_df['experimentName'] == experimentName]).empty if not experiments_df.empty else False
            if record_exists:
                experiments_df.drop(experiments_df[experiments_df['modelName'] == modelName][experiments_df['experimentName'] == experimentName].index, inplace = True) 

            experiments_df = experiments_df.append(pd.DataFrame(row_information, index=[0]), ignore_index = True)
            experiments_df.to_csv(experimentsFileNameAndPath, header=True, index=False)

        bar.update()

        experiment_index = experiment_index + 1



experiment:   0%|          | 0/2 [00:00<?, ?it/s]
getting statistics:   0%|          | 0/729 [00:00<?, ?it/s][A
getting statistics:   0%|          | 1/729 [00:00<02:00,  6.03it/s][A

using cuda 1
{'image_path': 'Official_Easy_30', 'suffix': '', 'img_res': 448, 'augmented': True, 'batchSize': 64, 'learning_rate': 0.0001, 'numOfTrials': 1, 'modelType': 'BB', 'lambda': 0.01, 'pretrained': True, 'tl_model': 'ResNet18', 'link_layer': 'avgpool', 'adaptive_smoothing': False, 'adaptive_lambda': 0.01, 'adaptive_alpha': 0.9, 'fc_layers': 1}
Creating datasets...
Couldn't read /raid/elhamod/Fish/Official_Easy_30/train/dataset_normlization.json



getting statistics:   0%|          | 3/729 [00:00<01:56,  6.24it/s][A
getting statistics:   1%|          | 4/729 [00:00<01:51,  6.48it/s][A
getting statistics:   1%|          | 5/729 [00:00<02:04,  5.82it/s][A
getting statistics:   1%|          | 8/729 [00:00<01:34,  7.60it/s][A
getting statistics:   1%|▏         | 10/729 [00:01<01:25,  8.43it/s][A
getting statistics:   2%|▏         | 12/729 [00:01<01:15,  9.53it/s][A
getting statistics:   2%|▏         | 14/729 [00:01<01:03, 11.29it/s][A
getting statistics:   2%|▏         | 16/729 [00:01<00:58, 12.12it/s][A
getting statistics:   2%|▏         | 18/729 [00:01<01:08, 10.33it/s][A
getting statistics:   3%|▎         | 20/729 [00:01<01:10, 10.09it/s][A
getting statistics:   3%|▎         | 22/729 [00:02<01:15,  9.41it/s][A
getting statistics:   3%|▎         | 25/729 [00:02<01:05, 10.82it/s][A
getting statistics:   4%|▎         | 27/729 [00:02<01:47,  6.52it/s][A
getting statistics:   4%|▍         | 28/729 [00:03<03:33,  3.28it/s

dataset has a mean: [0.7751760482788086, 0.7462451457977295, 0.6747339963912964] and std: [0.20897795259952545, 0.261340469121933, 0.3171098828315735]
file /raid/elhamod/Fish/Official_Easy_30/train/dataset_normlization.json written
Couldn't read /raid/elhamod/Fish/Official_Easy_30/train/PCA.pkl



stacking images:  18%|█▊        | 128/729 [00:09<00:46, 13.02it/s][A
stacking images:  35%|███▌      | 256/729 [00:21<00:38, 12.36it/s][A
stacking images:  53%|█████▎    | 384/729 [00:32<00:28, 12.01it/s][A
stacking images:  70%|███████   | 512/729 [00:39<00:16, 13.35it/s][A
stacking images:  88%|████████▊ | 640/729 [00:47<00:06, 14.37it/s][A
stacking images: 100%|██████████| 729/729 [00:53<00:00, 13.73it/s][A


Calculating PCA...
Calculating PCA done.
saving PCA
file /raid/elhamod/Fish/Official_Easy_30/train/PCA.pkl written
saving PCA done.
Creating datasets... Done.
Creating loaders...
Creating loaders... Done.


HBox(children=(FloatProgress(value=0.0, description='trial', max=1.0, style=ProgressStyle(description_width='i…

{'experimentName': 'dummy_experiment', 'modelName': 'models/b9911c77887e22522c24920e5be9b33a14be0349a98d7f3d095958cd', 'datasetName': 'datasplits/1e08e54afe7ff4a92819e8ab1af434554dd4da78f6c53664a3393d6f', 'experimentHash': '30f202fa55ecf4d31cfb4b906ee773da2476bc394aa2467256f6b08e', 'trialHash': 'b9911c77887e22522c24920e5be9b33a14be0349a98d7f3d095958cd', 'image_path': 'Official_Easy_30', 'suffix': '', 'img_res': 448, 'augmented': True, 'batchSize': 64, 'learning_rate': 0.0001, 'numOfTrials': 1, 'modelType': 'BB', 'lambda': 0.01, 'pretrained': True, 'tl_model': 'ResNet18', 'link_layer': 'avgpool', 'adaptive_smoothing': False, 'adaptive_lambda': 0.01, 'adaptive_alpha': 0.9, 'fc_layers': 1}




iteration:   0%|          | 0/4 [00:00<?, ?it/s][A[A

Training started...




iteration:   0%|          | 0/4 [00:43<?, ?it/s, min_val_loss=inf, train=0.254, val=0.228, val_loss=3.61][A[A

iteration:  25%|██▌       | 1/4 [00:43<02:10, 43.43s/it, min_val_loss=inf, train=0.254, val=0.228, val_loss=3.61][A[A

iteration:  25%|██▌       | 1/4 [01:34<02:10, 43.43s/it, min_val_loss=4.39, train=0.636, val=0.454, val_loss=3.51][A[A

iteration:  50%|█████     | 2/4 [01:34<01:31, 45.63s/it, min_val_loss=4.39, train=0.636, val=0.454, val_loss=3.51][A[A

iteration:  50%|█████     | 2/4 [02:26<01:31, 45.63s/it, min_val_loss=2.2, train=0.891, val=0.631, val_loss=3.44] [A[A

iteration:  75%|███████▌  | 3/4 [02:26<00:47, 47.74s/it, min_val_loss=2.2, train=0.891, val=0.631, val_loss=3.44][A[A

iteration:  75%|███████▌  | 3/4 [03:18<00:47, 47.74s/it, min_val_loss=1.59, train=0.973, val=0.681, val_loss=3.41][A[A

iteration: 100%|██████████| 4/4 [03:27<00:00, 51.92s/it, min_val_loss=1.59, train=0.973, val=0.681, val_loss=3.41][A[A
experiment:  50%|█████     | 1/2 [


{'image_path': 'Official_Easy_30', 'suffix': '', 'img_res': 448, 'augmented': True, 'batchSize': 64, 'learning_rate': 0.0001, 'numOfTrials': 1, 'modelType': 'HGNN', 'lambda': 0.01, 'pretrained': True, 'tl_model': 'ResNet18', 'link_layer': 'avgpool', 'adaptive_smoothing': False, 'adaptive_lambda': 0.01, 'adaptive_alpha': 0.9, 'fc_layers': 1}
Creating datasets...
Creating datasets... Done.
Creating loaders...
Creating loaders... Done.


HBox(children=(FloatProgress(value=0.0, description='trial', max=1.0, style=ProgressStyle(description_width='i…

{'experimentName': 'dummy_experiment', 'modelName': 'models/2d1677fd91b5b7eeb00e01cd44475e6beeef17e5b9ac6d6712eeb975', 'datasetName': 'datasplits/1e08e54afe7ff4a92819e8ab1af434554dd4da78f6c53664a3393d6f', 'experimentHash': 'b21aff7ff43c309e4e63d898766d2be960915421e12ff28c0d711708', 'trialHash': '2d1677fd91b5b7eeb00e01cd44475e6beeef17e5b9ac6d6712eeb975', 'image_path': 'Official_Easy_30', 'suffix': '', 'img_res': 448, 'augmented': True, 'batchSize': 64, 'learning_rate': 0.0001, 'numOfTrials': 1, 'modelType': 'HGNN', 'lambda': 0.01, 'pretrained': True, 'tl_model': 'ResNet18', 'link_layer': 'avgpool', 'adaptive_smoothing': False, 'adaptive_lambda': 0.01, 'adaptive_alpha': 0.9, 'fc_layers': 1}




iteration:   0%|          | 0/4 [00:00<?, ?it/s][A[A

Training started...




iteration:   0%|          | 0/4 [01:08<?, ?it/s, min_val_loss=inf, train=0.186, val=0.146, val_loss=3.59][A[A

iteration:  25%|██▌       | 1/4 [01:08<03:25, 68.42s/it, min_val_loss=inf, train=0.186, val=0.146, val_loss=3.59][A[A

iteration:  25%|██▌       | 1/4 [02:20<03:25, 68.42s/it, min_val_loss=6.85, train=0.537, val=0.392, val_loss=3.45][A[A

iteration:  50%|█████     | 2/4 [02:20<02:19, 69.66s/it, min_val_loss=6.85, train=0.537, val=0.392, val_loss=3.45][A[A

iteration:  50%|█████     | 2/4 [03:34<02:19, 69.66s/it, min_val_loss=2.55, train=0.871, val=0.593, val_loss=3.41][A[A

iteration:  75%|███████▌  | 3/4 [03:34<01:10, 70.83s/it, min_val_loss=2.55, train=0.871, val=0.593, val_loss=3.41][A[A

iteration:  75%|███████▌  | 3/4 [04:47<01:10, 70.83s/it, min_val_loss=1.69, train=0.971, val=0.659, val_loss=3.4] [A[A

iteration: 100%|██████████| 4/4 [04:57<00:00, 74.30s/it, min_val_loss=1.69, train=0.971, val=0.659, val_loss=3.4][A[A
experiment: 100%|██████████| 2/2 [





