In [1]:
from cerebro.backend import SparkBackend
from cerebro.keras import SparkEstimator

# datas storage for intermediate data and model artifacts.
from cerebro.storage import LocalStore, HDFSStore

# Model selection/AutoML methods.
from cerebro.tune import GridSearch, RandomSearch, TPESearch

# Utility functions for specifying the search space.
from cerebro.tune import hp_choice, hp_uniform, hp_quniform, hp_loguniform, hp_qloguniform

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from pyspark.sql import SparkSession
import numpy as np
import os

os.environ["PYSPARK_PYTHON"] = '/usr/bin/python3.6'
os.environ["PYSPARK_DRIVER_PYTHON"] = '/usr/bin/python3.6'

from pyspark import SparkConf

conf = SparkConf().setAppName('cluster') \
    .setMaster('spark://10.10.1.1:7077') \
    .set('spark.task.cpus', '16')
spark = SparkSession.builder.config(conf=conf).getOrCreate()
spark.sparkContext.addPyFile("cerebro.zip")

work_dir = '/var/nfs/'
backend = SparkBackend(spark_context=spark.sparkContext, num_workers=4)
store = LocalStore(prefix_path=work_dir + 'test/')

df = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(work_dir + "data/mnist.scale")
# spark = SparkSession \
#     .builder \
#     .appName("Cerebro Example") \
#     .getOrCreate()

# ...
# work_dir = '/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/'
# backend = SparkBackend(spark_context=spark.sparkContext, num_workers=1)
# store = LocalStore(prefix_path=work_dir + 'test/')

# df = spark.read.format("libsvm") \
#     .option("numFeatures", "784") \
#     .load("/Users/zijian/Desktop/ucsd/cse234/project/mnist/mnist.scale")

from pyspark.ml.feature import OneHotEncoderEstimator

encoder = OneHotEncoderEstimator(dropLast=False)
encoder.setInputCols(["label"])
encoder.setOutputCols(["label_OHE"])

encoder_model = encoder.fit(df)
encoded = encoder_model.transform(df)

feature_columns=['features']
label_columns=['label_OHE']
train_df, test_df = encoded.randomSplit([0.8, 0.2], seed=100)

CEREBRO => Time: 2021-12-04 04:48:20, Running 4 Workers


In [2]:
from keras_tuner.engine import hyperparameters
from keras_tuner import HyperParameters
import autokeras as ak
from cerebro.nas.hphpmodel import HyperHyperModel

img_shape = (28, 28, 1)

input_node = ak.ImageInput()
output_node = ak.ConvBlock(
    kernel_size=hyperparameters.Fixed('kernel_size', value=3),
    num_blocks=hyperparameters.Fixed('num_blocks', value=1),
    num_layers=hyperparameters.Fixed('num_layers', value=2),
)(input_node)
output_node = ak.ClassificationHead()(output_node)
am = HyperHyperModel(input_node, output_node, seed=2000)

am.resource_bind(
    backend=backend, 
    store=store,
    feature_columns=feature_columns,
    label_columns=label_columns,
    evaluation_metric='accuracy', 
)

am.tuner_bind(
    tuner="greedy", 
#     tuner="randomsearch",
    hyperparameters=None, 
    objective="val_accuracy",
    max_trials=2,
    overwrite=True,
    exploration=0.3,
)

In [3]:
am.test_tuner_space(df=train_df)
am.tuner.search_space_summary()

Search space summary
Default search space size: 12
conv_block_1/kernel_size (Fixed)
{'conditions': [], 'value': 3}
conv_block_1/separable (Boolean)
{'default': False, 'conditions': []}
conv_block_1/max_pooling (Boolean)
{'default': True, 'conditions': []}
conv_block_1/dropout (Choice)
{'default': 0, 'conditions': [], 'values': [0.0, 0.25, 0.5], 'ordered': True}
conv_block_1/num_blocks (Fixed)
{'conditions': [], 'value': 1}
conv_block_1/num_layers (Fixed)
{'conditions': [], 'value': 2}
conv_block_1/filters_0_0 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512], 'ordered': True}
conv_block_1/filters_0_1 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512], 'ordered': True}
classification_head_1/spatial_reduction_1/reduction_type (Choice)
{'default': 'flatten', 'conditions': [], 'values': ['flatten', 'global_max', 'global_avg'], 'ordered': False}
classification_head_1/dropout (Choice)
{'default': 0, 'conditions': [], 'values': [0.0

In [4]:
tuner = am.tuner

tuner.oracle.hyperparameters.values

{'conv_block_1/kernel_size': 3,
 'conv_block_1/separable': False,
 'conv_block_1/max_pooling': True,
 'conv_block_1/dropout': 0,
 'conv_block_1/num_blocks': 1,
 'conv_block_1/num_layers': 2,
 'conv_block_1/filters_0_0': 32,
 'conv_block_1/filters_0_1': 32,
 'classification_head_1/spatial_reduction_1/reduction_type': 'flatten',
 'classification_head_1/dropout': 0,
 'optimizer': 'adam',
 'learning_rate': 0.001}

In [5]:
tuner.oracle.cold_start(5)
tuner.oracle.initial_hps

[{'conv_block_1/kernel_size': 3,
  'conv_block_1/separable': True,
  'conv_block_1/max_pooling': True,
  'conv_block_1/dropout': 0.0,
  'conv_block_1/num_blocks': 1,
  'conv_block_1/num_layers': 2,
  'conv_block_1/filters_0_0': 256,
  'conv_block_1/filters_0_1': 32,
  'classification_head_1/spatial_reduction_1/reduction_type': 'flatten',
  'classification_head_1/dropout': 0.25,
  'optimizer': 'adam',
  'learning_rate': 2e-05},
 {'conv_block_1/kernel_size': 3,
  'conv_block_1/separable': False,
  'conv_block_1/max_pooling': False,
  'conv_block_1/dropout': 0.5,
  'conv_block_1/num_blocks': 1,
  'conv_block_1/num_layers': 2,
  'conv_block_1/filters_0_0': 256,
  'conv_block_1/filters_0_1': 32,
  'classification_head_1/spatial_reduction_1/reduction_type': 'global_avg',
  'classification_head_1/dropout': 0.25,
  'optimizer': 'adam_weight_decay',
  'learning_rate': 0.0001},
 {'conv_block_1/kernel_size': 3,
  'conv_block_1/separable': True,
  'conv_block_1/max_pooling': True,
  'conv_block_1/

In [6]:
trials = tuner.oracle.create_trials(tuner.parallelsim, tuner.tuner_id)

In [8]:
for trial in trials:
    print(trial.status)

RUNNING
RUNNING
STOPPED
STOPPED


In [7]:
tuner.begin_trials(trials)


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
conv_block_1/ke...|3                 |?                 
conv_block_1/se...|True              |?                 
conv_block_1/ma...|True              |?                 
conv_block_1/dr...|0                 |?                 
conv_block_1/nu...|1                 |?                 
conv_block_1/nu...|2                 |?                 
conv_block_1/fi...|256               |?                 
conv_block_1/fi...|32                |?                 
classification_...|flatten           |?                 
classification_...|0.25              |?                 
optimizer         |adam              |?                 
learning_rate     |2e-05             |?                 


Search: Running Trial #2

Hyperparameter    |Value             |Best Value So Far 
conv_block_1/ke...|3                 |?                 
conv_block_1/se...|False             |?                 
conv_block_1/ma...|False         

KeyError: '61e3485d6e3c868625fc240e5be5466b'

In [14]:
tuner.begin_trials(trials)


Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
optimizer         |adam              |?                 
learning_rate     |0.001             |?                 
batch_size        |32                |?                 
conv_block_1/ke...|3                 |?                 
conv_block_1/se...|True              |?                 
conv_block_1/ma...|False             |?                 
conv_block_1/dr...|0.5               |?                 
conv_block_1/nu...|1                 |?                 
conv_block_1/nu...|2                 |?                 
conv_block_1/fi...|64                |?                 
conv_block_1/fi...|32                |?                 
classification_...|global_avg        |?                 
classification_...|0                 |?                 


Search: Running Trial #2

Hyperparameter    |Value             |Best Value So Far 
optimizer         |adam              |?                 
learning_rate     |0.0001        

In [7]:
hp_names = tuner.oracle._select_hps()
hp_names

['conv_block_1/filters_0_0']

In [8]:
values = tuner.oracle._generate_hp_values(hp_names)
values

{'optimizer': 'adam',
 'learning_rate': 0.001,
 'batch_size': 32,
 'conv_block_1/kernel_size': 3,
 'conv_block_1/separable': False,
 'conv_block_1/max_pooling': True,
 'conv_block_1/dropout': 0,
 'conv_block_1/num_blocks': 1,
 'conv_block_1/num_layers': 2,
 'conv_block_1/filters_0_0': 512,
 'conv_block_1/filters_0_1': 32,
 'classification_head_1/spatial_reduction_1/reduction_type': 'flatten',
 'classification_head_1/dropout': 0}

In [7]:
response = tuner.oracle.populate_space(111)
response

{'status': 'RUNNING',
 'values': {'optimizer': 'adam',
  'learning_rate': 0.001,
  'batch_size': 32,
  'conv_block_3/kernel_size': 3,
  'conv_block_3/separable': False,
  'conv_block_3/max_pooling': True,
  'conv_block_3/dropout': 0,
  'conv_block_3/num_blocks': 1,
  'conv_block_3/num_layers': 2,
  'conv_block_3/filters_0_0': 128,
  'conv_block_3/filters_0_1': 32,
  'classification_head_3/spatial_reduction_1/reduction_type': 'flatten',
  'classification_head_3/dropout': 0}}

In [8]:
import keras_tuner
from cerebro.nas.tuners.greedy import Trie, TrieNode
trie = Trie()
best_hps = tuner.oracle._get_best_hps()
for hp in best_hps.space:
    # Not picking the fixed hps for generating new values.
    if best_hps.is_active(hp) and not isinstance(
        hp, keras_tuner.engine.hyperparameters.Fixed
    ):
        trie.insert(hp.name)
all_nodes = trie.nodes
trie

<cerebro.nas.tuners.greedy.Trie at 0x183244ad0>

In [9]:
trie.get_hp_names(all_nodes[0])

['optimizer',
 'learning_rate',
 'batch_size',
 'conv_block_3/separable',
 'conv_block_3/max_pooling',
 'conv_block_3/dropout',
 'conv_block_3/filters_0_0',
 'conv_block_3/filters_0_1',
 'classification_head_3/spatial_reduction_1/reduction_type',
 'classification_head_3/dropout']

In [12]:
probabilities = np.array([1 / node.num_leaves for node in all_nodes])
sum_p = np.sum(probabilities)
probabilities = probabilities / sum_p
node = np.random.choice(all_nodes, p=probabilities)
print(trie.get_hp_names(node))

tuner.oracle._generate_hp_values(trie.get_hp_names(node))

['conv_block_3/separable']


{'optimizer': 'adam',
 'learning_rate': 0.001,
 'batch_size': 32,
 'conv_block_3/kernel_size': 3,
 'conv_block_3/separable': True,
 'conv_block_3/max_pooling': True,
 'conv_block_3/dropout': 0,
 'conv_block_3/num_blocks': 1,
 'conv_block_3/num_layers': 2,
 'conv_block_3/filters_0_0': 32,
 'conv_block_3/filters_0_1': 32,
 'classification_head_3/spatial_reduction_1/reduction_type': 'flatten',
 'classification_head_3/dropout': 0}

In [None]:
# Early Stage

In [3]:
cuz_hps = HyperParameters()
cuz_hps.Choice('learning_rate', values=[0.1,0.01])
cuz_hps.Choice('batch_size', values=[32,64,128])

am.tuner_bind(tuner="randomsearch", hyperparameters=cuz_hps)
am.test_tuner_space(df=train_df)

am.tuner.search_space_summary()

INFO:tensorflow:Reloading Oracle from existing project ./test/oracle.json
INFO:tensorflow:Reloading Tuner from ./test/tuner0.json


2021-11-21 18:01:29.584433: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-21 18:01:29.584715: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Search space summary
Default search space size: 30
structured_data_block_1/normalize (Boolean)
{'default': False, 'conditions': []}
structured_data_block_1/dense_block_1/use_batchnorm (Boolean)
{'default': False, 'conditions': []}
structured_data_block_1/dense_block_1/num_layers (Choice)
{'default': 2, 'conditions': [], 'values': [1, 2, 3], 'ordered': True}
structured_data_block_1/dense_block_1/units_0 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}
structured_data_block_1/dense_block_1/dropout (Choice)
{'default': 0.0, 'conditions': [], 'values': [0.0, 0.25, 0.5], 'ordered': True}
structured_data_block_1/dense_block_1/units_1 (Choice)
{'default': 32, 'conditions': [], 'values': [16, 32, 64, 128, 256, 512, 1024], 'ordered': True}
structured_data_block_2/normalize (Boolean)
{'default': False, 'conditions': []}
structured_data_block_2/dense_block_2/use_batchnorm (Boolean)
{'default': False, 'conditions': []}
structured_data_block_2

In [9]:
tuner = am.tuner

trials = tuner.oracle.create_trials(2, tuner.tuner_id)
for trial in trials:
    print(trial.hyperparameters.values)

{'learning_rate': 0.1, 'batch_size': 128, 'structured_data_block_1/normalize': False, 'structured_data_block_1/dense_block_1/use_batchnorm': True, 'structured_data_block_1/dense_block_1/num_layers': 2, 'structured_data_block_1/dense_block_1/units_0': 512, 'structured_data_block_1/dense_block_1/dropout': 0.25, 'structured_data_block_1/dense_block_1/units_1': 16, 'classification_head_1/dropout': 0.0, 'optimizer': 'sgd', 'structured_data_block_1/dense_block_1/units_2': 16, 'structured_data_block_2/normalize': True, 'structured_data_block_2/dense_block_2/use_batchnorm': True, 'structured_data_block_2/dense_block_2/num_layers': 3, 'structured_data_block_2/dense_block_2/units_0': 64, 'structured_data_block_2/dense_block_2/dropout': 0.0, 'structured_data_block_2/dense_block_2/units_1': 32, 'structured_data_block_3/normalize': False, 'structured_data_block_3/dense_block_3/use_batchnorm': True, 'structured_data_block_3/dense_block_3/num_layers': 1, 'structured_data_block_3/dense_block_3/units_0

In [10]:
kt_trial = tuner.oracle.create_trial(tuner.tuner_id)
kt_trial.hyperparameters.values

{'learning_rate': 0.01,
 'batch_size': 64,
 'structured_data_block_1/normalize': False,
 'structured_data_block_1/dense_block_1/use_batchnorm': True,
 'structured_data_block_1/dense_block_1/num_layers': 1,
 'structured_data_block_1/dense_block_1/units_0': 256,
 'structured_data_block_1/dense_block_1/dropout': 0.25,
 'structured_data_block_1/dense_block_1/units_1': 128,
 'classification_head_1/dropout': 0.0,
 'optimizer': 'adam',
 'structured_data_block_1/dense_block_1/units_2': 512,
 'structured_data_block_2/normalize': False,
 'structured_data_block_2/dense_block_2/use_batchnorm': False,
 'structured_data_block_2/dense_block_2/num_layers': 1,
 'structured_data_block_2/dense_block_2/units_0': 256,
 'structured_data_block_2/dense_block_2/dropout': 0.25,
 'structured_data_block_2/dense_block_2/units_1': 128,
 'structured_data_block_3/normalize': False,
 'structured_data_block_3/dense_block_3/use_batchnorm': False,
 'structured_data_block_3/dense_block_3/num_layers': 2,
 'structured_data_

In [12]:
am.model_selection.evaluation_metric

'loss'

In [10]:
model = tuner.hypermodel.build(kt_trial.hyperparameters)

In [11]:
model.optimizer

<autokeras.keras_layers.AdamWeightDecay at 0x17f9b9810>

In [12]:
tuner.hypermodel.hypermodel._get_loss()

{'classification_head_1': <tensorflow.python.keras.losses.CategoricalCrossentropy at 0x17f754b10>}

In [13]:
tuner.hypermodel.hypermodel._get_metrics().values()

dict_values([['accuracy']])

In [14]:
tuner.hypermodel.hypermodel.batch_size

32

In [15]:
model.get_config()

{'name': 'model',
 'layers': [{'class_name': 'InputLayer',
   'config': {'batch_input_shape': (None, 1),
    'dtype': 'float64',
    'sparse': False,
    'ragged': False,
    'name': 'input_1'},
   'name': 'input_1',
   'inbound_nodes': []},
  {'class_name': 'Custom>MultiCategoryEncoding',
   'config': {'name': 'multi_category_encoding',
    'trainable': True,
    'dtype': 'float32',
    'encoding': ListWrapper(['none'])},
   'name': 'multi_category_encoding',
   'inbound_nodes': [[['input_1', 0, 0, {}]]]},
  {'class_name': 'Dense',
   'config': {'name': 'dense',
    'trainable': True,
    'dtype': 'float32',
    'units': 512,
    'activation': 'linear',
    'use_bias': True,
    'kernel_initializer': {'class_name': 'GlorotUniform',
     'config': {'seed': None}},
    'bias_initializer': {'class_name': 'Zeros', 'config': {}},
    'kernel_regularizer': None,
    'bias_regularizer': None,
    'activity_regularizer': None,
    'kernel_constraint': None,
    'bias_constraint': None},
   'n

In [38]:
model.metrics_names

[]

In [16]:
tf.keras.utils.get_custom_objects()

{'Custom>CastToFloat32': autokeras.keras_layers.CastToFloat32,
 'Custom>ExpandLastDim': autokeras.keras_layers.ExpandLastDim,
 'Custom>MultiCategoryEncoding': autokeras.keras_layers.MultiCategoryEncoding,
 'Custom>BertTokenizer': autokeras.keras_layers.BertTokenizer,
 'Custom>BertEncoder': autokeras.keras_layers.BertEncoder,
 'Custom>AdamWeightDecay': autokeras.keras_layers.AdamWeightDecay,
 'Custom>WarmUp': autokeras.keras_layers.WarmUp,
 'Custom>gelu': <function autokeras.keras_layers.gelu(x)>,
 'Custom>OnDeviceEmbedding': autokeras.keras_layers.OnDeviceEmbedding,
 'Custom>PositionEmbedding': autokeras.keras_layers.PositionEmbedding,
 'Custom>SelfAttentionMask': autokeras.keras_layers.SelfAttentionMask,
 'Custom>Transformer': autokeras.keras_layers.Transformer,
 'Custom>MultiHeadAttention': autokeras.keras_layers.MultiHeadAttention,
 'Custom>DenseEinsum': autokeras.keras_layers.DenseEinsum,
 'Custom>MaskedSoftmax': autokeras.keras_layers.MaskedSoftmax}

In [19]:
xtmp = [x_train[:,i,np.newaxis] for i in range(x_train.shape[1])]
dataset, validation_data = am._convert_to_dataset(
            x=xtmp, y=y_train, validation_data=None, batch_size=32
        )
ests = tuner.trials2estimators(trials, dataset)

2021-11-20 22:00:19.559444: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


In [20]:
ests[0].getCustomObjects()

{'Custom>CastToFloat32': autokeras.keras_layers.CastToFloat32,
 'Custom>ExpandLastDim': autokeras.keras_layers.ExpandLastDim,
 'Custom>MultiCategoryEncoding': autokeras.keras_layers.MultiCategoryEncoding,
 'Custom>BertTokenizer': autokeras.keras_layers.BertTokenizer,
 'Custom>BertEncoder': autokeras.keras_layers.BertEncoder,
 'Custom>AdamWeightDecay': autokeras.keras_layers.AdamWeightDecay,
 'Custom>WarmUp': autokeras.keras_layers.WarmUp,
 'Custom>gelu': <function autokeras.keras_layers.gelu(x)>,
 'Custom>OnDeviceEmbedding': autokeras.keras_layers.OnDeviceEmbedding,
 'Custom>PositionEmbedding': autokeras.keras_layers.PositionEmbedding,
 'Custom>SelfAttentionMask': autokeras.keras_layers.SelfAttentionMask,
 'Custom>Transformer': autokeras.keras_layers.Transformer,
 'Custom>MultiHeadAttention': autokeras.keras_layers.MultiHeadAttention,
 'Custom>DenseEinsum': autokeras.keras_layers.DenseEinsum,
 'Custom>MaskedSoftmax': autokeras.keras_layers.MaskedSoftmax}

In [21]:
ests[0].get_model_shapes()

([[-1, 1], [-1, 1], [-1, 1], [-1, 1]], [[-1, 1]])

In [22]:
model = ests[0].getModel()
model

<tensorflow.python.keras.engine.functional.Functional at 0x17f926090>

In [23]:
model.inputs

[<KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_1')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_2')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_3')>,
 <KerasTensor: shape=(None, 1) dtype=float64 (created by layer 'input_4')>]

In [24]:
model.outputs

[<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'classification_head_1')>]

In [25]:
ests[0].getMetrics()

[['accuracy']]

In [26]:
["val_" + name for name in ests[0].getMetrics()]

TypeError: can only concatenate str (not "list") to str

In [27]:
[name for name in ests[0].getMetrics()]

[['accuracy']]

In [28]:
tuner.hypermodel.hypermodel._get_metrics()

{'classification_head_1': ['accuracy']}

In [29]:
tuner.hypermodel.hypermodel._get_metrics()

In [1]:
from cerebro.backend import SparkBackend
from cerebro.keras import SparkEstimator

# datas storage for intermediate data and model artifacts.
from cerebro.storage import LocalStore, HDFSStore

# Model selection/AutoML methods.
from cerebro.tune import GridSearch, RandomSearch, TPESearch

# Utility functions for specifying the search space.
from cerebro.tune import hp_choice, hp_uniform, hp_quniform, hp_loguniform, hp_qloguniform

import tensorflow as tf
# tf.config.run_functions_eagerly(True)

from pyspark.sql import SparkSession


spark = SparkSession \
    .builder \
    .appName("Cerebro Iris") \
    .getOrCreate()

...

backend = SparkBackend(spark_context=spark.sparkContext, num_workers=1)
store = LocalStore(prefix_path='/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/experiments')

from pyspark.ml.feature import OneHotEncoderEstimator

df = spark.read.csv("/Users/zijian/Desktop/ucsd/cse234/project/cerebro-system/Iris_clean.csv", header=True, inferSchema=True)

encoder = OneHotEncoderEstimator(dropLast=False)
encoder.setInputCols(["Species"])
encoder.setOutputCols(["Species_OHE"])

encoder_model = encoder.fit(df)
encoded = encoder_model.transform(df)

feature_columns=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
label_columns=['Species_OHE']

# Initialize input DataFrames.
# You can download sample dataset from https://apache.googlesource.com/spark/+/master/data/mllib/sample_libsvm_data.txt

train_df, test_df = encoded.randomSplit([0.8, 0.2])

21/11/22 03:02:21 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
21/11/22 03:02:21 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


CEREBRO => Time: 2021-11-22 03:02:22, Running 1 Workers


In [2]:
import numpy as np

x_train = np.array(train_df.select(feature_columns).collect())
y_train = np.array(train_df.select(label_columns).collect())
x_train = [x_train[:,i,np.newaxis] for i in range(x_train.shape[1])]
y_train = np.squeeze(y_train,1)

In [3]:
inputs = [tf.keras.Input(shape=(1,)) for col in feature_columns]
concat = tf.keras.layers.Concatenate()(inputs)
output1 = tf.keras.layers.Dense(32, activation=tf.nn.relu)(concat)
output2 = tf.keras.layers.Dense(32, activation=tf.nn.relu)(output1)
output = tf.keras.layers.Dense(3, activation=tf.nn.softmax)(output2)
model = tf.keras.Model(inputs, output)

2021-11-22 03:02:26.737898: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-22 03:02:26.738147: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
model.compile(
    loss='categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy'],
)

In [5]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 1)]          0                                            
______________________________________________________________________________________________

In [6]:
hist = model.fit(x_train, y_train, batch_size=16, epochs=10, verbose=1)

2021-11-22 03:02:26.936090: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
x_test = np.array(test_df.select(feature_columns).collect())
y_test = np.array(test_df.select(label_columns).collect())
x_test = [x_test[:,i,np.newaxis] for i in range(x_test.shape[1])]
y_test = np.squeeze(y_test,1)

model.evaluate(x_test, y_test)



[0.12223644554615021, 0.9655172228813171]

In [1]:
import json

In [10]:
m = {'model_0_1638446850': {
  'train_loss': [2.302694082260132],
  'train_accuracy': [0.09375],
  'val_loss': [2.3033695220947266],
  'val_accuracy': [0.0625]},
    'model_0_1638446851': {
  'train_loss': [2.302694082260132],
  'train_accuracy': [0.09375],
  'val_loss': [2.3033695220947266],
  'val_accuracy': [0.0625]}
    }
with open("/users/Zijian/test.txt", "w") as file:
    file.write(json.dumps(m))