Citation for code: 

In [None]:
import os, shutil, time, csv,math,scipy,matplotlib
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
from pathlib import Path
import numpy as np
import pandas as pd
from scipy.stats import pearsonr,gaussian_kde
#displaying data
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow,figure
import matplotlib.image as mpimg
plt.rcParams['figure.dpi'] = 300 
%matplotlib inline
#TF imports
import tensorflow as tf
'''TF code to tell TF version, GPU detected, and limit memory growth'''
print(f"Tensorflow ver. {tf.__version__}")
physical_device = tf.config.experimental.list_physical_devices('GPU')
print(f'Device found : {physical_device}')
tf.config.experimental.get_memory_growth(physical_device[0])
from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential,Model,load_model
from tensorflow.keras.layers import Input, Conv2D,Conv1D, Dense, Flatten, Dropout

### Variables and Functions

In [None]:
Train_csv = r'D:\Training_Data_Creation\Pointer_files\Train_Tiles40.csv'
Val_csv = r'D:\Training_Data_Creation\Pointer_files\Val_Tiles40.csv'
Test_csv = r'D:\Training_Data_Creation\Pointer_files\Test_Tiles40.csv'
#temp weights which get overwriten by save_nn_for_log
temp_weights=r'D:\Training_Data_Creation\01-logs\temp.h5'
SEED = 71

In [None]:
def process_csv_to_datasets(incsv):
    dset=pd.read_csv(incsv)
    dset_labels = dset.pop('Trueval')
    dset= np.array(dset)/65535 #divide by 65535 since that is the max 16bit int value
    dset= tf.dtypes.cast(dset,'float32')
     #divide by 100 since that is max mask pixel value
    dset_labels=np.array(dset_labels)/100
    dset_labels=tf.dtypes.cast(dset_labels,'float32')
    return dset,dset_labels
#splitting the datasets into values and labels
train,train_labels=process_csv_to_datasets(Train_csv)
val,val_labels=process_csv_to_datasets(Val_csv)
test,test_labels=process_csv_to_datasets(Test_csv)

In [None]:
import ipynb.fs 
#Importing metric functions
from .defs.Thesis_Functions import dtime,calculate_metrics,time_and_metrics
#Importing plotting functions
from .defs.Thesis_Functions import make_scatter_from_results,plot_hist_save,Feature_importance
#Importing NN 
from .defs.Thesis_Functions import get_model_mae,save_nn_for_log,nnDset_to_Results

In [None]:
'''Creating Datasets for Dense Neaural Network'''
BUFFER_SIZE,BATCH_SIZE=200000,1000
#training it in batches to maximize speed
train_dataset=tf.data.Dataset.from_tensor_slices((train,train_labels))
train_dataset = train_dataset.shuffle(buffer_size=BUFFER_SIZE, seed=SEED).repeat().batch(BATCH_SIZE)#batchsize=batchsize for training
val_dataset=tf.data.Dataset.from_tensor_slices((val,val_labels)).batch(len(val))#batchsize == number of individual values
test_dataset=tf.data.Dataset.from_tensor_slices((test,test_labels)).batch(len(test)) 

## Model Tuning
#### This section is for finding the optimal Hyperparameters

In [None]:
NN_name='NN4'
def Dense_NN(lr,u,u2,u3,u4):
    model=Sequential()
    model.add(Dense(u, activation='relu', input_shape=[9]))
    model.add(Dense(u2, activation='relu'))
    model.add(Dense(u3, activation='relu'))
    model.add(Dense(u4, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss= "MeanAbsoluteError",
                  optimizer=keras.optimizers.adam(learning_rate=lr),
                  metrics=[tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanSquaredError()])
    return model

In [None]:
def create_best_scatter (dataset,datasetstr):
    lowest = min(models_dic, key=models_dic.get)
    hparam=hp_MAE_dic[min(hp_MAE_dic)]
    '''Defining the parameters to what will get passed into the NN'''
    spe,lr,u,u2,u3,u4=hparam[0],hparam[1],hparam[2],hparam[3],hparam[4],hparam[5]
    model = Dense_NN(lr,u,u2,u3,u4) #defining NN with the parameter
    model_weights_path = r"D:\Training_Data_Creation\01-logs\logs{}\{}".format(NN_name,lowest)
    files=os.listdir(model_weights_path)
    model.load_weights(model_weights_path+'\\'+files[1])
    #set result str to the string version of the dataset
    print('Best Scatterplot {}'.format(lowest))
    make_scatter_from_results(nnDset_to_Results(model,dataset),datasetstr,'01-Best {} scatters'.format(NN_name),logpath)

def create_txt_and_prints():
    txtfile=logpath+r'/01-{}Logs.txt'.format(NN_name)
    best5_models=sorted(models_dic, key=models_dic.get, reverse=False)[:10]
    worst5_models=sorted(models_dic, key=models_dic.get, reverse=True)[:10]
    with open(txtfile, 'w') as f:
        f.write("Overall runtime is "+str("%.2f"%(endmain-startmain))+' sec\n')
        f.write('Order goes SPE, BS, LR, Units (first layer then rest), MAE \n Best 10 Models \n')
        for line in list(best5_models):
            f.write(str(line))
            f.write('\n')
        f.write('\n Worst 10 Models \n')
        for line in list(worst5_models):
            f.write(str(line))
            f.write('\n')
        f.write('\n All Models \n')
        for line in list(models_dic):
            f.write(str(line))
            f.write('\n')
    print('Logs created, Best 10 models are:')
    for i in best5_models:
        print(i)
    print('\n Worst 10 models are:')
    for i in worst5_models:
        print(i)
    print('\n')
    print("Overall runtime is "+str("%.2f"%(endmain-startmain))+' sec')
def display_best_history():
    lowest = min(models_dic, key=models_dic.get)
    models_dic[lowest]
    model_weights_path = r"D:\Training_Data_Creation\01-logs\logs{}\{}".format(NN_name,lowest)
    files=os.listdir(model_weights_path)
    imgstr=files[0]
    img = mpimg.imread(model_weights_path+'\\'+imgstr)
    plt.imshow(img)

In [None]:
def assess_model(hparam,session_n):
    '''Defining the parameters to what will get passed into the NN'''
    spe,lr,u,u2,u3,u4=hparam[0],hparam[1],hparam[2],hparam[3],hparam[4],hparam[5]
    model = Dense_NN(lr,u,u2,u3,u4) #defining NN with the parameter
    start=time.time()
    callbacks = [] #defining callbacks
    callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10))
    callbacks.append(tf.keras.callbacks.ModelCheckpoint(temp_weights, save_best_only=True, save_weights_only=True))
    model_history=model.fit(train_dataset,validation_data=val_dataset,validation_steps=1,callbacks=callbacks,
                            steps_per_epoch=spe,
                            epochs=500)#running the model
    MAE=get_model_mae(model,temp_weights,val_dataset)
    end=time.time()
    runtime=str("%.2f"%(end-start))
    model_name= "{}-{}mae-{}r-{}p-{}sec".format(NN_name,str(round(MAE,5)),session_n,str(hparam),runtime)
    save_nn_for_log(model,temp_weights,val_dataset,logpath,model_name) #saving the model with informative info
    plot_hist_save(model,temp_weights,val_dataset,model_history,logpath,model_name)
    print('MAE is {} in {}sec'.format(MAE,runtime))
    models_dic[model_name]=MAE
    return MAE

In [None]:
session_n=0
startmain = time.time()
hp_MAE_dic={}
hp_dic={'hp_lr':[.001],'hp_spe':[100],
        'hp_u':[32,24,16,8],'hp_u2':[32,24,16,8],'hp_u3':[32,24,16,8],'hp_u4':[32,24,16,8]}
models_dic={}
logpath=r"D:\Training_Data_Creation\01-logs\logs{}".format(NN_name)
for spe in hp_dic['hp_spe']:
    for lr in hp_dic['hp_lr']:
        for u in hp_dic['hp_u']:
            for u2 in hp_dic['hp_u2']:
                for u3 in hp_dic['hp_u3']:
                    for u4 in hp_dic['hp_u4']:
                        hparam=[spe,lr,u,u2,u3,u4]
                        print('--- Session {}: Testing {} spe, {} bs, {} lr and {},{},{},{} units'.format(session_n,spe,BATCH_SIZE,lr,u,u2,u3,u4))
                        MAE=assess_model(hparam,session_n)
                        plt.close()
                        hp_MAE_dic[MAE]=hparam
                        session_n+=1
endmain = time.time()

In [None]:
'''Functions work on most recently assessed model'''
create_txt_and_prints()
create_best_scatter (val_dataset,'Val')

In [None]:
display_best_history()

## Model Tuning with LR Scheduler
#### This section is for finding the optimal Hyperparameters with LR Schedulers

In [None]:
lr=.001
def scheduler(epoch, lr):
    if epoch <= 20:
        return lr
    else:
        punish_val=int(epoch/20)
        return lr/((punish_val*.1+1))

In [None]:
lr=.001
x,y=[],[]
for i in range(0,100):
    x.append(i)
    y.append(scheduler(i, lr))
plt.plot(x,y, color='black')
plt.xlabel('Epoch',size=14)
plt.ylabel('Learning Rate',size=14)

In [None]:
def assess_model_schedule(hparam,session_n):
    '''Defining the parameters to what will get passed into the NN'''
    spe,u,u2,u3,u4=hparam[0],hparam[1],hparam[2],hparam[3],hparam[4]
    model = Dense_NN(u,u2,u3,u4) #defining NN with the parameter
    start=time.time()
    callbacks = [] #defining callbacks
    callbacks.append(tf.keras.callbacks.LearningRateScheduler(scheduler))
    callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15))
    callbacks.append(tf.keras.callbacks.ModelCheckpoint(temp_weights, save_best_only=True, save_weights_only=True))
    model_history=model.fit(train_dataset,validation_data=val_dataset,validation_steps=1,callbacks=callbacks,
                            steps_per_epoch=spe,
                            epochs=500)#running the model
    MAE=get_model_mae(model,temp_weights,val_dataset)
    end=time.time()
    runtime=str("%.2f"%(end-start))
    model_name= "{}-{}mae-{}r-{}p-{}sec".format(NN_name,str(round(MAE,5)),session_n,str(hparam),runtime)
    save_nn_for_log(model,temp_weights,val_dataset,logpath,model_name) #saving the model with informative info
    plot_hist_save(model,temp_weights,val_dataset,model_history,logpath,model_name)
    print('MAE is {} in {}sec'.format(MAE,runtime))
    models_dic[model_name]=MAE
    return MAE
def create_best_scatter (dataset,datasetstr):
    lowest = min(models_dic, key=models_dic.get)
    hparam=hp_MAE_dic[min(hp_MAE_dic)]
    '''Defining the parameters to what will get passed into the NN'''
    spe,u,u2,u3,u4=hparam[0],hparam[1],hparam[2],hparam[3],hparam[4]
    model = Dense_NN(u,u2,u3,u4) #defining NN with the parameter
    model_weights_path = r"D:\Training_Data_Creation\01-logs\logs{}\{}".format(NN_name,lowest)
    files=os.listdir(model_weights_path)
    model.load_weights(model_weights_path+'\\'+files[1])
    #set result str to the string version of the dataset
    print('Best Scatterplot {}'.format(lowest))
    make_scatter_from_results(nnDset_to_Results(model,dataset),datasetstr,'01-Best {} scatters'.format(NN_name),logpath)


In [None]:
NN_name='NN4_1000bs_relu_LRscheduler'
def Dense_NN(u,u2,u3,u4): # the last tested. good for more data.
    model=Sequential()
    model.add(Dense(u, activation='relu', input_shape=[9]))
    model.add(Dense(u2, activation='relu'))
    model.add(Dense(u3, activation='relu'))
    model.add(Dense(u4, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss= "MeanAbsoluteError",
                  optimizer=keras.optimizers.Adam(),
                  metrics=[tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanSquaredError()])
    return model

In [None]:
session_n=0
startmain = time.time()
hp_MAE_dic={}
hp_dic={'hp_spe':[150,200],
        'hp_u':[40,32,24,16],'hp_u2':[40,32,24,16],'hp_u3':[40,32,24,16],'hp_u4':[40,32,24,16]}
models_dic={}
logpath=r"D:\Training_Data_Creation\01-logs\logs{}".format(NN_name)
for spe in hp_dic['hp_spe']:
    for u in hp_dic['hp_u']:
        for u2 in hp_dic['hp_u2']:
            for u3 in hp_dic['hp_u3']:
                for u4 in hp_dic['hp_u4']:
                    hparam=[spe,u,u2,u3,u4]
                    print('--- Session {}: Testing {} spe, {} bs, and {},{},{},{} units'.format(session_n,spe,BATCH_SIZE,u,u2,u3,u4))
                    MAE=assess_model_schedule(hparam,session_n)
                    plt.close()
                    hp_MAE_dic[MAE]=hparam
                    session_n+=1
endmain = time.time()

In [None]:
'''Functions work on most recently assessed model'''
create_txt_and_prints()
create_best_scatter (val_dataset,'Val')

## Assess Different Models
#### For evaluating test dataset and other datasets after model selection

In [None]:
NN_name='NN4'
def Dense_NN(lr,u,u2,u3,u4): # the last tested. good for more data.
    model=Sequential()
    model.add(Dense(u, activation='relu', input_shape=[9]))
    model.add(Dense(u2, activation='relu'))
    model.add(Dense(u3, activation='relu'))
    model.add(Dense(u4, activation='relu'))
    model.add(Dense(1, activation='linear'))
    model.compile(loss= "MeanAbsoluteError",
                  optimizer=keras.optimizers.Adam(learning_rate=lr),
                  metrics=[tf.keras.metrics.MeanAbsoluteError(),tf.keras.metrics.MeanSquaredError()])
    return model

In [None]:
lr,u,u2,u3,u4= 0.001, 16, 16, 32,40
path_to_weights=r'D:\Training_Data_Creation\01-logs\Best_dnn\01SELECTEDNN4_1000bs_2-0.05139mae-1012r-[225, 0.001, 16, 16, 32, 40]p-66.64sec\Weights-0.05139_11182022_070752.h5'
def load_model_and_assess(nndset,dsetstr):
    model=Dense_NN(lr,u,u2,u3,u4)
    model.load_weights(path_to_weights)
    #set result str to the string version of the dataset
    model.summary()
    make_scatter_from_results(nnDset_to_Results(model,nndset),dsetstr,'01-Best {} scatters'.format(NN_name),r'D:\Training_Data_Creation\Results_Scatter\Dense_NN')

In [None]:
load_model_and_assess(val_dataset,'val')