## Notebook for creating experiment CSVs.

Allows the user to define the name of experiment paramaters (rows), and the values those rows should take. Most are constant for all experiments, and will just be a single value. Others want to be varried accross experiments, they are defined in lists and are looped through nested-style to create every combination of desired experiments. In the final CSV, columns represent individual experiments, and rows are the parameters of those experiments. 

In [18]:
import pandas as pd
import numpy as np

#names of rows
experiment_df_rows = ["dbName",
"num_trials",
"maxIterations",
"numLLEpochs",
"fineTuneEpochs",
"numDenseNeurons",
"labelSmoothing",
"dropoutRate",
"LLLearningRate",
"fineTuneLearningRate",
"batchSize",
"weightClasses",
"augmentImages",
"useValidation",
"earlyStoppingPatience",                      
"writeFinalModel",
"modelType",
"remoteDB",
"evaluationSize",
"TVPoolSize",
"validationRatio",                      
"predictionSize",
"citizenWeight",
"confidentThreshold",
"numALSamplesPerIter",
"activeLearningLabeler",
"alSelectionCriteria",
"SSLType",
"classNoise",
"classImbalance",
"optimizer"]


#constants
num_trials = 2
maxIterations = 1
numLLEpochs = 50
fineTuneLearningRate = 0
fineTuneEpochs = 0
labelSmoothing = 0
dropoutRate = 0.4
batchSize = 32
TVPoolSize = 12000
validationRatio = 0.2
augmentImages = "FALSE"
useValidation = "TRUE"
earlyStoppingPatience = 4
writeFinalModel = "FALSE"
remoteDB = True
db_name = "intel_images_1"
evaluationSize = 2500
predictionSize = 32
citizenWeight = 0
confidentThreshold = 0
numALSamplesPerIter = 0
activeLearningLabeler = "simulated"
alSelectionCriteria = "random"
SSLType = "none"
weightClasses = "TRUE"
classNoise = [0,0,0,0,0,0]
classImbalance = [1,1,1,1,1,1]
optimizer = 'sgd'
numDenseNeurons = 256*2

#varying
db_name = "intel_images_2"
LLLearningRates = [1e-3, 1e-4, 1e-5,1e-6, 1e-7, 1e-8, 1e-9]
modelTypes = ['inception','densenet']




In [19]:
experiment_base_name = "sgd_intel_param_search"

on_experiment = 0



experiment_df = pd.DataFrame(index = experiment_df_rows)

#loops through varying parameters
for modelTypeX in modelTypes:
    for LLLearningRateX in LLLearningRates:
        

        #formulates experiment name
        experiment_name = experiment_base_name + "_" + str(on_experiment)

        #creates column for experiment
        data_col = [db_name,
                    num_trials,
                    maxIterations,
                    numLLEpochs,
                    fineTuneEpochs,
                    numDenseNeurons,
                    labelSmoothing,
                    dropoutRate,
                    LLLearningRateX,
                    fineTuneLearningRate,
                    batchSize,
                    weightClasses,
                    augmentImages,
                    useValidation,
                    earlyStoppingPatience, 
                    writeFinalModel,
                    modelTypeX,
                    remoteDB,
                    evaluationSize,
                    TVPoolSize,
                    validationRatio,
                    predictionSize,
                    citizenWeight,
                    confidentThreshold,
                    numALSamplesPerIter,
                    activeLearningLabeler,
                    alSelectionCriteria,
                    SSLType,
                    classNoise,
                    classImbalance,
                    optimizer]
        experiment_df[experiment_name] = data_col

        on_experiment += 1

                    

In [20]:
experiment_df

Unnamed: 0,sgd_intel_param_search_0,sgd_intel_param_search_1,sgd_intel_param_search_2,sgd_intel_param_search_3,sgd_intel_param_search_4,sgd_intel_param_search_5,sgd_intel_param_search_6,sgd_intel_param_search_7,sgd_intel_param_search_8,sgd_intel_param_search_9,sgd_intel_param_search_10,sgd_intel_param_search_11,sgd_intel_param_search_12,sgd_intel_param_search_13
dbName,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2,intel_images_2
num_trials,2,2,2,2,2,2,2,2,2,2,2,2,2,2
maxIterations,1,1,1,1,1,1,1,1,1,1,1,1,1,1
numLLEpochs,50,50,50,50,50,50,50,50,50,50,50,50,50,50
fineTuneEpochs,0,0,0,0,0,0,0,0,0,0,0,0,0,0
numDenseNeurons,512,512,512,512,512,512,512,512,512,512,512,512,512,512
labelSmoothing,0,0,0,0,0,0,0,0,0,0,0,0,0,0
dropoutRate,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4
LLLearningRate,0.001,0.0001,0.00001,0.000001,0.0,0.0,0.0,0.001,0.0001,0.00001,0.000001,0.0,0.0,0.0
fineTuneLearningRate,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
#saves final CSV
experiment_df.to_csv("./experiment_param_csvs/sgd_intel_param_search.csv")