In [1]:
"""
Created on Wed Aug  4 17:50:06 2021

@author: amol
"""

# set TF GPU memory growth so that it doesn't hog everything at once
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

from main import *
debug_mode = True  # faster experiments for debugging

# util for getting objects' fields' names
field_names = lambda x: list(vars(x).keys())

In [2]:
#Prepare the DataFrame that will be used downstream
dp = DataPreparer()
dp.createPCAs()
dp.sparsePCAs()
dp.zmixOrthogonalPCAs()
df = dp.getDataframe()

# TODO: add PCA from linear model first
df.to_csv('PCA_data.csv', index=False)

# currently passing dp eventually we want to abstract all the constants into 1 class
dm = DataManager(df, dp)

In [None]:
'''
1. Run the GP Experiments
'''
import time

start=time.time()
exprExec = run_gp_experiments(dm, debug_mode=debug_mode)
print(f'duration: {time.time()-start}')

In [None]:
'''
2. Run the Simple DNN Experiments
'''

exprExec = run_simple_dnn_experiments(dm, debug_mode=debug_mode)

In [3]:
'''
4. Run the PCDNN_v2 Experiments
'''

%debug exprExec = run_pcdnn_v2_experiments(dm, debug_mode=debug_mode)

NOTE: Enter 'c' at the ipdb>  prompt to continue execution.
> [0;32m<string>[0m(1)[0;36m<module>[0;34m()[0m

ipdb> q


In [3]:
""" prepare PCDNNV2 for loading (from prior experiments) """

exprExec = PCDNNV2ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV2ModelFactory())

Parent DNNModelFactory Instantiated


In [None]:
""" prepare PCDNNV1 for loading (from prior experiments) """

exprExec = PCDNNV1ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV1ModelFactory())

In [None]:
""" prepare SimpleDNN for loading (from prior experiments) """

exprExec = DNNExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(SimpleDNNModelFactory())

In [4]:
dataType = 'randomequaltraintestsplit' #'frameworkincludedtrainexcludedtest'
inputType = 'AllSpecies'
dataSetMethod = f'{inputType}_{dataType}'
opscaler = 'PositiveLogNormal' #"MinMaxScaler"
ZmixPresent = 'Y'
concatenateZmix = 'Y'
kernel_constraint = 'N'
kernel_regularizer = 'N'
activity_regularizer = 'N'
noOfCpv = 4
noOfNeurons = 53

exprExec.modelFactory.loss='mse'
exprExec.modelFactory.activation_func='relu'
exprExec.modelFactory.dropout_rate=0.5
exprExec.debug_mode = False
exprExec.epochs_override = 10
exprExec.n_models_override = 1

# initialize experiment executor...
exprExec.dm = dm
exprExec.df_experimentTracker = pd.DataFrame()
exprExec.modelType = 'PCDNNV2'

exprExec.executeSingleExperiment(noOfNeurons,dataSetMethod,dataType,inputType,ZmixPresent,noOfCpv,concatenateZmix,kernel_constraint,
                                 kernel_regularizer,activity_regularizer,opscaler=opscaler)

--------------------self.build_and_compile_pcdnn_v2_model----------------------
53 4 N N N
Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
species_input (InputLayer)      [(None, 53)]         0                                            
__________________________________________________________________________________________________
zmix (InputLayer)               [(None, 1)]          0                                            
__________________________________________________________________________________________________
linear_embedding (Functional)   (None, 4)            216         species_input[0][0]              
__________________________________________________________________________________________________
concatenated_zmix_linear_embedd (None, 5)            0           zmix[0][0]                       
   

ipdb> n
> [0;32m/home/dwyerdei/rom_project2/src/experiment_executor/error_manager.py[0m(16)[0;36mcomputeError[0;34m()[0m
[0;32m     14 [0;31m        [0mevaluation_df_1[0m [0;34m=[0m [0mpd[0m[0;34m.[0m[0mDataFrame[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     15 [0;31m[0;34m[0m[0m
[0m[0;32m---> 16 [0;31m        [0mevaluation_df_1[0m[0;34m[[0m[0;34m'souener'[0m[0;34m][0m [0;34m=[0m [0mY_test[0m[0;34m.[0m[0mflatten[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     17 [0;31m[0;34m[0m[0m
[0m[0;32m     18 [0;31m        [0mevaluation_df_1[0m[0;34m[[0m[0;34m'souener_pred'[0m[0;34m][0m [0;34m=[0m [0mY_pred[0m[0;34m.[0m[0mflatten[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> n
> [0;32m/home/dwyerdei/rom_project2/src/experiment_executor/error_manager.py[0m(18)[0;36mcomputeError[0;34m()[0m
[0;32m     16 [0;31m        [0mevaluation_df_1[0m[0;34m[[0m[0;34m'souener'[

BdbQuit: 

In [5]:
import numpy as np
print(np.log(2189882659.968396)/np.log(10))
print(np.log(2099652636.0322459)/np.log(10))
print(np.log(199414035386.85098)/np.log(10))
#vars(exprExec.dm.outputScaler.log_col_transformers[0])
#exprExec.df_err

print(np.exp(np.sqrt(0.0370)))


9.340420844751762
9.322147451527908
11.299755722062626
1.2120993310693264


In [None]:
good_model = exprExec.model
print(good_model)

In [None]:
import keras
help(keras.models.Model.fit)

In [None]:
import model_analyzer.model_analysis as model_analysis
import importlib; importlib.reload(model_analysis)

bestModel, experimentSettings = exprExec.modelFactory.openBestModel()
print(f'\nexperimentSettings: {experimentSettings}')
print(f'\nbestModel.input_shape: {bestModel.input_shape}')
inspector = model_analysis.ModelInspector(exprExec.modelFactory, dm)

In [None]:
n_repeats = 5 if debug_mode else 20
inspector.plot_permutation_feature_importance(n_repeats=n_repeats)

In [None]:
inspector.plot_partial_dependence()

### Print & Record Linear Embeddings Output

In [None]:
import numpy as np

def inspect_PCA(X):
    PCA_dict = {}
    PCA_dict['X'] = X
    PCA_dict['Var'] = PCA_dict['X'].var(axis=0)
    
    # sort by explained variance
    sort_idx = np.argsort(PCA_dict['Var'])[::-1]
    PCA_dict['X'] = PCA_dict['X'][:, sort_idx]
    PCA_dict['Var'] = PCA_dict['Var'][sort_idx]
    PCA_dict['CumVar'] = np.add.accumulate(PCA_dict['Var'])
    return PCA_dict


X, Y, rom, zmix = dm.getAllData()
FullDataset = inspect_PCA(X)
try:    
    linearAutoEncoder = exprExec.modelFactory.getLinearEncoder()
    X = linearAutoEncoder.predict(FullDataset['X'])
    DNN_PCA = inspect_PCA(X)
except KeyError:
    print('No Linear Auto Encoder!')

In [None]:
#dm.createDataset
import matplotlib.pyplot as plt
import copy
dm_PurePCA = copy.deepcopy(dm)
dm_PurePCA.createTrainTestData(dataSetMethod='PurePCA_randomequalflamesplit',
                               numCpvComponents=2, ipscaler=None, opscaler=None)

X,Y,rom,zmix = dm_PurePCA.getAllData()
PurePCA = inspect_PCA(X)
print(PurePCA['Var'])
print(DNN_PCA['Var'])

plt.plot(PurePCA['CumVar']/FullDataset['Var'].sum(), color='r')
plt.plot(DNN_PCA['CumVar']/FullDataset['Var'].sum(), color='b')
plt.title('PCA Total Variance Comparison')