In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

print(tf.__version__)

from scipy.stats import pearsonr, spearmanr
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 20,
})

2.4.0-rc0


In [2]:
diagsN7m5 = pd.read_csv('../../data/tobias/EC3_data/diagsN7m5.csv',names=np.arange(10000)).T
fidelsN7m5 = pd.read_csv('../../data/tobias/EC3_data/fidelsN7m5.csv',names=np.arange(10000)).T

N_instance, N_state = diagsN7m5.shape

diagsN7m5 = (diagsN7m5 - diagsN7m5.to_numpy().min())
diagsN7m5 /= (diagsN7m5.to_numpy().max() - diagsN7m5.to_numpy().min())

In [3]:
#convert fidelities into one_hot representation

In [4]:
uni_fidels = np.sort(fidelsN7m5[0].unique())
N_unique = uni_fidels.shape[0]

In [5]:
one_hot_fidels = pd.DataFrame()

In [6]:
for uni_fidel in uni_fidels:
    one_hot_fidels[uni_fidel] = 1*(fidelsN7m5 == uni_fidel).to_numpy().flatten()

In [7]:
one_hot_fidels

Unnamed: 0,0.405344,0.446338,0.504930,0.529460,0.542748,0.579066,0.596918,0.601784,0.617170,0.620147,...,0.656487,0.662125,0.665196,0.694049,0.696092,0.717018,0.761217,0.765388,0.774484,0.801876
0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9996,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
9997,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
9998,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
#add state number as second channel, ordered by energy, normalized to (0,1)

In [9]:
diags_CNN = diagsN7m5.to_numpy().reshape((-1,N_state,1))
state_CNN = np.linspace(0,1,N_state).repeat(N_instance).reshape(N_state,N_instance).T.reshape((-1,N_state,1))

diags_CNN_twoChannel = np.append(diags_CNN,state_CNN,axis=2)


In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(diags_CNN_twoChannel, \
                                                     one_hot_fidels,\
                                                     test_size=.2)



In [11]:
remaining = 1

X_train_reduced = np.zeros((X_train.shape[0],N_state*remaining,2))
choice = np.arange(N_state)>=N_state*(1-remaining)
for i in range(X_train.shape[0]):
    np.random.shuffle(choice)
    X_train_reduced[i,:] = X_train[i,choice]

X_test_reduced = np.zeros((X_test.shape[0],N_state*remaining,2))
choice = np.arange(N_state)>=N_state*(1-remaining)
for i in range(X_test.shape[0]):
    np.random.shuffle(choice)
    X_test_reduced[i,:] = X_test[i,choice]

In [12]:
#CNN

In [24]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [25]:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp

In [53]:
HP_CONV = hp.HParam('conv', hp.Discrete([6]))
HP_POOL = hp.HParam('pool', hp.Discrete([16]))
HP_DROPOUT = hp.HParam('drop', hp.RealInterval(0.0, 0.1))
HP_DENSE = hp.HParam('dense', hp.Discrete([16]))
HP_LR = hp.HParam('lr', hp.RealInterval(0.001,.002))

METRIC_ACCURACY = 'accuracy'
METRIC_LOSS = 'loss'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_CONV, HP_POOL, HP_DROPOUT, HP_DENSE, HP_LR],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

In [54]:
def model_HP(hparams):
    tf.keras.backend.clear_session()
    model = keras.Sequential(name='CNN_classifier')
    model.add(layers.Conv1D(1*hparams[HP_CONV], (3,), activation='relu', padding='same', input_shape=(N_state*remaining, 2)))
    model.add(layers.Conv1D(2*hparams[HP_CONV], (3,), activation='relu', padding='same'))
    model.add(layers.Conv1D(4*hparams[HP_CONV], (3,), activation='relu', padding='same'))
    model.add(layers.AveragePooling1D((hparams[HP_POOL],)))
    model.add(layers.Flatten())
    model.add(layers.Dropout(hparams[HP_DROPOUT]))
    model.add(layers.Dense(hparams[HP_DENSE], activation='relu'))
    model.add(layers.Dense(hparams[HP_DENSE], activation='relu'))
    model.add(layers.Dense(N_unique, activation=tf.keras.activations.softmax))

    model.compile(loss='CategoricalCrossentropy',optimizer=tf.keras.optimizers.Adam(hparams[HP_LR]),metrics=['accuracy'])

    hist = model.fit(X_train_reduced, Y_train, \
             epochs=10,verbose=0,\
             validation_split=.2,batch_size=128)
    
    return hist#[-1]


In [55]:
def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        hist = model_HP(hparams)
        epochs = np.arange(hist.params['epochs'])
        accuracy = hist.history['val_accuracy']
        loss = hist.history['val_loss']
        for i,epoch in enumerate(epochs):
            tf.summary.scalar(METRIC_ACCURACY, accuracy[i], step=epoch)
            tf.summary.scalar(METRIC_LOSS, loss[i], step=epoch)

In [56]:
import itertools

In [57]:
conv = HP_CONV.domain.values
pool = HP_POOL.domain.values
drop = np.linspace(HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value,2)
dense = HP_DENSE.domain.values
lr = np.linspace(HP_LR.domain.min_value, HP_LR.domain.max_value,2)

In [58]:
HP_iterator = itertools.product(conv,pool,drop,dense,lr)

In [59]:
session_num = 0
for HP_iter in HP_iterator:
    hparams = {
                HP_CONV: HP_iter[0],
                HP_POOL: HP_iter[1],
                HP_DROPOUT: HP_iter[2],
                HP_DENSE: HP_iter[3],
                HP_LR: HP_iter[4],
              }
    run_name = "run-%d" % session_num
    print('--- Starting trial: %s' % run_name)
    print({h.name: hparams[h] for h in hparams})
    run('logs/hparam_tuning/' + run_name, hparams)
    session_num += 1

--- Starting trial: run-0
{'conv': 6, 'pool': 16, 'drop': 0.0, 'dense': 16, 'lr': 0.001}
--- Starting trial: run-1
{'conv': 6, 'pool': 16, 'drop': 0.0, 'dense': 16, 'lr': 0.002}
--- Starting trial: run-2
{'conv': 6, 'pool': 16, 'drop': 0.1, 'dense': 16, 'lr': 0.001}
--- Starting trial: run-3
{'conv': 6, 'pool': 16, 'drop': 0.1, 'dense': 16, 'lr': 0.002}


In [61]:
%tensorboard --logdir logs/hparam_tuning

In [23]:
model.compile(loss='CategoricalCrossentropy',optimizer=tf.keras.optimizers.Adam(.001),metrics=['accuracy'])

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
    
hist = model.fit(X_train_reduced, Y_train, \
             epochs=2000,verbose=0,\
             validation_split=.2,batch_size=128,\
             callbacks=[stop_early])

NameError: name 'model' is not defined

In [None]:
plt.semilogy(hist.history['loss'], label='loss')
plt.semilogy(hist.history['val_loss'], label = 'val loss')
plt.xlabel('Epoch')
plt.legend()

In [None]:
plt.plot(hist.history['accuracy'], label='accuracy')
plt.plot(hist.history['val_accuracy'], label = 'val accuracy')
plt.xlabel('Epoch')
plt.legend()

In [None]:
#apply trained CNN to test data, convert test results to dataframe of groups of unique values

In [None]:
df_pivot = pd.DataFrame({'true':Y_test.to_numpy().argmax(axis=1),\
             'predict':model.predict(X_test_reduced).argmax(axis=1)})
df_pivot = df_pivot.groupby(df_pivot.columns.tolist()).size().reset_index()
df_pivot = df_pivot.pivot("true", "predict", 0)

In [None]:
ax = sns.heatmap(df_pivot)