In [4]:
"""
Created on Wed Aug  4 17:50:06 2021

@author: amol
"""

# set TF GPU memory growth so that it doesn't hog everything at once
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

import os
import numpy as np
from main import *
debug_mode = True  # faster experiments for debugging

# util for getting objects' fields' names
field_names = lambda x: list(vars(x).keys())

In [5]:
#Prepare the DataFrame that will be used downstream
dp = DataPreparer()
dp.createPCAs()
dp.sparsePCAs()
dp.zmixOrthogonalPCAs()
df = dp.getDataframe()

# currently passing dp eventually we want to abstract all the constants into 1 class
dm = DataManager(df, dp)

In [None]:
"""
bestModel, experimentSettings = exprExec.modelFactory.openBestModel()
dm.createTrainTestData(experimentSettings['dataSetMethod'], experimentSettings['noOfCpv'],
                       experimentSettings['ipscaler'], experimentSettings['opscaler'])
dm.input_data_cols
"""

In [4]:
'''
Run the Model Experiments
'''
model = 'PCDNN_V2'
assert model in ['PCDNN_V2', 'PCDNN_V1', 'SIMPLE_DNN', 'GP'] # valid possible models
exprExec=run_model_experiments(dm, models=model, debug_mode=False)



Parent DNNModelFactory Instantiated
Parent DNNModelFactory Instantiated
------------------ AllSpeciesAndZmix ------------------
--------------------self.build_and_compile_pcdnn_v2_model----------------------
53 4 Y Y Y Y
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 species_input (InputLayer)     [(None, 53)]         0           []                               
                                                                                                  
 zmix (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 linear_embedding (Functional)  (None, 4)            216         ['species_input[0][0]']          
                                                                       

KeyboardInterrupt: 

In [None]:
""" prepare SimpleDNN for loading (from prior experiments) """

exprExec = DNNExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(SimpleDNNModelFactory())

In [None]:
""" prepare PCDNNV1 for loading (from prior experiments) """

exprExec = PCDNNV1ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV1ModelFactory())

In [6]:
""" prepare PCDNNV2 for loading (from prior experiments) """

exprExec = PCDNNV2ExperimentExecutor()
exprExec.debug_mode = debug_mode

exprExec.setModelFactory(PCDNNV2ModelFactory())

Parent DNNModelFactory Instantiated
Parent DNNModelFactory Instantiated




## Rapid Model Testing: 
### (requires setting up PCDNNV2 for loading)

In [7]:
# fix seeds
import random
import numpy as np
random.seed(0)
np.random.seed(0)
tf.random.set_seed(0)

dataType = 'randomequaltraintestsplit' #'frameworkincludedtrainexcludedtest'
inputType = 'AllSpeciesAndZmix'
dataSetMethod = f'{inputType}_{dataType}'
opscaler = "MinMaxScaler"# 'PositiveLogNormal'
ZmixPresent = 'Y'
concatenateZmix = 'Y'
kernel_constraint = 'Y'
kernel_regularizer = 'Y'
activity_regularizer = 'Y'
noOfCpv = 4
noOfNeurons = 53

exprExec.modelFactory.loss='mae'
exprExec.modelFactory.activation_func='relu'
exprExec.modelFactory.dropout_rate=0.5
exprExec.debug_mode = False
exprExec.epochs_override = 100
exprExec.batch_size = 32
exprExec.n_models_override = 1

# initialize experiment executor...
exprExec.dm = dm
exprExec.df_experimentTracker = pd.DataFrame()
exprExec.modelType = 'PCDNNV2'

history = exprExec.executeSingleExperiment(noOfNeurons,dataSetMethod,dataType,inputType,ZmixPresent=ZmixPresent,
                                           noOfCpv=noOfCpv,concatenateZmix=concatenateZmix,kernel_constraint=kernel_constraint,
                                           kernel_regularizer=kernel_regularizer,activity_regularizer=activity_regularizer,
                                           opscaler=opscaler)

--------------------self.build_and_compile_pcdnn_v2_model----------------------
53 4 Y Y Y Y
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 species_input (InputLayer)     [(None, 53)]         0           []                               
                                                                                                  
 zmix (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 linear_embedding (Functional)  (None, 4)            216         ['species_input[0][0]']          
                                                                                                  
 concatenated_zmix_linear_embed  (None, 5)           0           ['zmix[0][0]',                   
 

AttributeError: 'PCDNNV2ExperimentExecutor' object has no attribute 'control'

In [8]:
import os, pickle
os.system('mkdir base_code_model')
with open('base_code_model/custom_objects.pickle', 'wb') as f:
        pickle.dump(exprExec.modelFactory.concreteClassCustomObject, f)

for i in range(200):
    model = exprExec.modelFactory.rebuild_model()
    model.save(f'base_code_model/base_code_model{i}.h5')
    

53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y
53 4 Y Y Y Y

In [14]:
help(pickle.dump)

Help on built-in function dump in module _pickle:

dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None)
    Write a pickled representation of obj to the open file object file.
    
    This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
    be more efficient.
    
    The optional *protocol* argument tells the pickler to use the given
    protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
    protocol is 4. It was introduced in Python 3.4, and is incompatible
    with previous versions.
    
    Specifying a negative protocol version selects the highest protocol
    version supported.  The higher the protocol used, the more recent the
    version of Python needed to read the pickle produced.
    
    The *file* argument must have a write() method that accepts a single
    bytes argument.  It can thus be a file object opened for binary
    writing, an io.BytesIO instance, or any other custom object that meets
    this interface.
   

## Results Plotting & Analysis

In [None]:
loss = 727684307.3417714 # Update Me!

def print_scientific_notation(number):
    power = int(np.log(number)/np.log(10))
    print(f"Scientific Notation: {(loss/10**power)}*10^{power}")
print_scientific_notation(loss)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
plt.plot(np.maximum(history.history['val_R2'][30:], 0))
plt.title('val_R^2 vs epochs')
plt.ylabel('percent')
plt.xlabel('epochs')
plt.show()

In [None]:
import model_analyzer.model_analysis as model_analysis
import importlib; importlib.reload(model_analysis)

bestModel, experimentSettings = exprExec.modelFactory.openBestModel()
dm.createTrainTestData(experimentSettings['dataSetMethod'],experimentSettings['noOfCpv'], experimentSettings['ipscaler'], experimentSettings['opscaler'])

print(f'\nexperimentSettings: {experimentSettings}')
print(f'\nbestModel.input_shape: {bestModel.input_shape}')
inspector = model_analysis.ModelInspector(exprExec.modelFactory, dm)

In [None]:
n_repeats = 5 if debug_mode else 20
inspector.plot_permutation_feature_importance(n_repeats=n_repeats)

In [None]:
inspector.plot_partial_dependence()

### Print & Record Linear Embeddings Output (NOTE: Deprecated)

In [None]:
import numpy as np

def inspect_PCA(X):
    PCA_dict = {}
    PCA_dict['X'] = X
    PCA_dict['Var'] = PCA_dict['X'].var(axis=0)
    
    # sort by explained variance
    sort_idx = np.argsort(PCA_dict['Var'])[::-1]
    PCA_dict['X'] = PCA_dict['X'][:, sort_idx]
    PCA_dict['Var'] = PCA_dict['Var'][sort_idx]
    PCA_dict['CumVar'] = np.add.accumulate(PCA_dict['Var'])
    return PCA_dict


X, Y, rom, zmix = dm.getAllData()
FullDataset = inspect_PCA(X)
try:    
    linearAutoEncoder = exprExec.modelFactory.getLinearEncoder()
    X = linearAutoEncoder.predict(FullDataset['X'])
    DNN_PCA = inspect_PCA(X)
except KeyError:
    print('No Linear Auto Encoder!')

In [None]:
#dm.createDataset
import matplotlib.pyplot as plt
import copy
dm_PurePCA = copy.deepcopy(dm)
dm_PurePCA.createTrainTestData(dataSetMethod='PurePCA_randomequalflamesplit',
                               numCpvComponents=2, ipscaler=None, opscaler=None)

X,Y,rom,zmix = dm_PurePCA.getAllData()
PurePCA = inspect_PCA(X)
print(PurePCA['Var'])
print(DNN_PCA['Var'])

plt.plot(PurePCA['CumVar']/FullDataset['Var'].sum(), color='r')
plt.plot(DNN_PCA['CumVar']/FullDataset['Var'].sum(), color='b')
plt.title('PCA Total Variance Comparison')