In [1]:
"""
Created on Wed Aug  4 17:50:06 2021

@author: amol
"""

# set TF GPU memory growth so that it doesn't hog everything at once
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

import os
import numpy as np
from main import *
debug_mode = True  # faster experiments for debugging

# util for getting objects' fields' names
field_names = lambda x: list(vars(x).keys())

In [2]:
#Prepare the DataFrame that will be used downstream
dp = DataPreparer()
dp.createPCAs()
dp.sparsePCAs()
dp.zmixOrthogonalPCAs()
df = dp.getDataframe()

# currently passing dp eventually we want to abstract all the constants into 1 class
dm = DataManager(df, dp)

In [None]:
"""
bestModel, experimentSettings = exprExec.modelFactory.openBestModel()
dm.createTrainTestData(experimentSettings['dataSetMethod'], experimentSettings['noOfCpv'],
                       experimentSettings['ipscaler'], experimentSettings['opscaler'])
dm.input_data_cols
"""

In [6]:
'''
Run the Model Experiments
'''
model = 'PCDNN_V2'
assert model in ['PCDNN_V2', 'PCDNN_V1', 'SIMPLE_DNN', 'GP'] # valid possible models
exprExec=run_model_experiments(dm, models=model, debug_mode=debug_mode)



Parent DNNModelFactory Instantiated
Parent DNNModelFactory Instantiated
------------------ AllSpeciesAndZmix ------------------
--------------------self.build_and_compile_pcdnn_v2_model----------------------
53 4 Y Y Y Y
Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 species_input (InputLayer)     [(None, 53)]         0           []                               
                                                                                                  
 zmix (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 linear_embedding (Functional)  (None, 4)            216         ['species_input[0][0]']          
                                                                     

Model: "linear_embedding"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 species_input (InputLayer)  [(None, 53)]              0         
                                                                 
 linear_embedding (Dense)    (None, 4)                 216       
                                                                 
Total params: 216
Trainable params: 216
Non-trainable params: 0
_________________________________________________________________
                MAE           MSE      MAPE
count  1.000000e+00  1.000000e+00   1.00000
mean   1.319457e+09  9.429482e+18  33.14007
std             NaN           NaN       NaN
min    1.319457e+09  9.429482e+18  33.14007
25%    1.319457e+09  9.429482e+18  33.14007
50%    1.319457e+09  9.429482e+18  33.14007
75%    1.319457e+09  9.429482e+18  33.14007
max    1.319457e+09  9.429482e+18  33.14007
self.modelType: PCDNNV2 dataType: randomequaltrainte

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

KeyboardInterrupt: 

In [None]:
""" prepare SimpleDNN for loading (from prior experiments) """

exprExec = DNNExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(SimpleDNNModelFactory())

In [None]:
""" prepare PCDNNV1 for loading (from prior experiments) """

exprExec = PCDNNV1ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV1ModelFactory())

In [3]:
""" prepare PCDNNV2 for loading (from prior experiments) """

exprExec = PCDNNV2ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV2ModelFactory())

Parent DNNModelFactory Instantiated
Parent DNNModelFactory Instantiated




## Rapid Model Testing: 
### (requires setting up PCDNNV2 for loading)

In [14]:
# fix seeds
import random
import numpy as np
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

dataType = 'randomequaltraintestsplit' #'frameworkincludedtrainexcludedtest'
inputType = 'AllSpeciesAndZmix'
dataSetMethod = f'{inputType}_{dataType}'
opscaler = "MinMaxScaler"# 'PositiveLogNormal'
ipscaler = "MinMaxScaler"# 'PositiveLogNormal'
ZmixPresent = 'Y'
concatenateZmix = 'Y'
kernel_constraint = 'Y'
kernel_regularizer = 'Y'
activity_regularizer = 'Y'
noOfCpv = 4
noOfNeurons = 53

exprExec.modelFactory.loss='mse'
exprExec.modelFactory.activation_func='relu'
exprExec.modelFactory.dropout_rate=0.25
exprExec.modelFactory.skip_layers = False
exprExec.modelFactory.batch_norm = False
exprExec.modelFactory.basic = False
exprExec.debug_mode = False
exprExec.epochs_override = 30
exprExec.batch_size = 32
exprExec.n_models_override = 1

# initialize experiment executor...
exprExec.dm = dm
exprExec.df_experimentTracker = pd.DataFrame()
exprExec.modelType = 'PCDNNV2'

history = exprExec.executeSingleExperiment(noOfNeurons,dataSetMethod,dataType,inputType,ZmixPresent=ZmixPresent,
                                           noOfCpv=noOfCpv,concatenateZmix=concatenateZmix,kernel_constraint=kernel_constraint,
                                           kernel_regularizer=kernel_regularizer,activity_regularizer=activity_regularizer,
                                           opscaler=opscaler, ipscaler=ipscaler)

--------------------self.build_and_compile_pcdnn_v2_model----------------------
53 4 Y Y Y Y
Model: "model_20"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 species_input (InputLayer)     [(None, 53)]         0           []                               
                                                                                                  
 zmix (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 linear_embedding (Functional)  (None, 4)            216         ['species_input[0][0]']          
                                                                                                  
 concatenated_zmix_linear_embed  (None, 5)           0           ['zmix[0][0]',                  

Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30


Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Total Absolute Error:  3.0807549727465697*10^13
Mean Absolute Error:  3.748333097392104*10^9
Mean Percentage Error:  1.4635035788207105*10^3
Total Squared Error:  4.14911912210024*10^23
Mean Squared Error:  5.048204309648667*10^19
Number of Points:  8219
Model: "linear_embedding"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 species_input (InputLayer)  [(None, 53)]              0         
                                                                 
 linear_embedding (Dense)    (None, 4)                 216       
                                                                 
Total params: 216
Trainable params: 216
Non-trainable params: 0
_________________________________________________________________
                MAE           MSE         MAPE
count  1.000000e+00  1.000

In [None]:
"""
import os, pickle
os.system('mkdir base_code_model')
with open('base_code_model/custom_objects.pickle', 'wb') as f:
        pickle.dump(exprExec.modelFactory.concreteClassCustomObject, f)

for i in range(200):
    model = exprExec.modelFactory.rebuild_model()
    model.save(f'base_code_model/base_code_model{i}.h5')
    
"""

## Results Plotting & Analysis

In [None]:
loss = 727684307.3417714 # Update Me!

def print_scientific_notation(number):
    power = int(np.log(number)/np.log(10))
    print(f"Scientific Notation: {(loss/10**power)}*10^{power}")
print_scientific_notation(loss)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.plot(np.maximum(history.history['val_R2'][30:], 0))
plt.title('val_R^2 vs epochs')
plt.ylabel('percent')
plt.xlabel('epochs')
plt.show()

In [None]:
import model_analyzer.model_analysis as model_analysis
import importlib; importlib.reload(model_analysis)

bestModel, experimentSettings = exprExec.modelFactory.openBestModel()
dm.createTrainTestData(experimentSettings['dataSetMethod'],experimentSettings['noOfCpv'], experimentSettings['ipscaler'], experimentSettings['opscaler'])

print(f'\nexperimentSettings: {experimentSettings}')
print(f'\nbestModel.input_shape: {bestModel.input_shape}')
inspector = model_analysis.ModelInspector(exprExec.modelFactory, dm)

In [None]:
n_repeats = 5 if debug_mode else 20
inspector.plot_permutation_feature_importance(n_repeats=n_repeats)

In [None]:
inspector.plot_partial_dependence()

### Print & Record Linear Embeddings Output (NOTE: Deprecated)

In [None]:
import numpy as np

def inspect_PCA(X):
    PCA_dict = {}
    PCA_dict['X'] = X
    PCA_dict['Var'] = PCA_dict['X'].var(axis=0)
    
    # sort by explained variance
    sort_idx = np.argsort(PCA_dict['Var'])[::-1]
    PCA_dict['X'] = PCA_dict['X'][:, sort_idx]
    PCA_dict['Var'] = PCA_dict['Var'][sort_idx]
    PCA_dict['CumVar'] = np.add.accumulate(PCA_dict['Var'])
    return PCA_dict


X, Y, rom, zmix = dm.getAllData()
FullDataset = inspect_PCA(X)
try:    
    linearAutoEncoder = exprExec.modelFactory.getLinearEncoder()
    X = linearAutoEncoder.predict(FullDataset['X'])
    DNN_PCA = inspect_PCA(X)
except KeyError:
    print('No Linear Auto Encoder!')

In [None]:
#dm.createDataset
import matplotlib.pyplot as plt
import copy
dm_PurePCA = copy.deepcopy(dm)
dm_PurePCA.createTrainTestData(dataSetMethod='PurePCA_randomequalflamesplit',
                               numCpvComponents=2, ipscaler=None, opscaler=None)

X,Y,rom,zmix = dm_PurePCA.getAllData()
PurePCA = inspect_PCA(X)
print(PurePCA['Var'])
print(DNN_PCA['Var'])

plt.plot(PurePCA['CumVar']/FullDataset['Var'].sum(), color='r')
plt.plot(DNN_PCA['CumVar']/FullDataset['Var'].sum(), color='b')
plt.title('PCA Total Variance Comparison')