In [917]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow.keras import layers
import time
from tensorflow import data
import pandas as pd
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense,ZeroPadding2D, BatchNormalization, Activation, Layer, ReLU, LeakyReLU,Conv2D,AveragePooling2D,UpSampling2D,Reshape,Flatten
from tensorflow.keras.models import Model
from keras import backend


In [918]:
BATCH_SIZE = 962


In [919]:
data = np.load('finDat.npy')

In [920]:
train_dataset = tf.data.Dataset.from_tensor_slices(data).shuffle(60000).batch(962)


In [921]:
DSHAPE = 284

In [922]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', use_bias=False, input_shape=(DSHAPE,)))
    model.add(layers.BatchNormalization())

    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(128,activation='relu'))


    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(64,activation='relu'))

    model.add(layers.Dense(DSHAPE, activation='relu', use_bias=False, input_shape=(DSHAPE,)))


    

    return model

In [923]:
generator = make_generator_model()

noise = tf.random.normal([1, DSHAPE])
generated_image = generator(noise, training=False)


In [924]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', use_bias=False, input_shape=(DSHAPE,)))
    model.add(layers.BatchNormalization())

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dropout(0.5))
   
    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [925]:
discriminator = make_discriminator_model()

In [926]:
def discriminator_loss(real_output, fake_output):
    real_loss = 0.5*tf.math.reduce_mean((real_output)**2) + 0.5*tf.math.reduce_mean(fake_output**2)
    return real_loss

In [927]:
def generator_loss(fake_output):
    return tf.math.reduce_mean((fake_output)**2)

In [928]:
generator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)

In [929]:
checkpoint_dir = './training_checkpointsL2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [930]:
EPOCHS = 200
noise_dim = DSHAPE
num_examples_to_generate = 50

# You will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [931]:
train_dataset

<BatchDataset element_spec=TensorSpec(shape=(None, 284), dtype=tf.float32, name=None)>

In [932]:
def train_step(images, gen_losses, disc_losses):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)

        disc_loss = discriminator_loss(real_output, fake_output)
        gen_losses = gen_losses.append(gen_loss.numpy())
        disc_losses = disc_losses.append(disc_loss.numpy())
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [933]:

def train(dataset, epochs, gen_losses, disc_losses, gloss, dloss):
  for epoch in range(epochs):
    start = time.time()


    for image_batch in dataset:
        train_step(image_batch, gen_losses,disc_losses)
    print("gen_loss =" + str(gen_losses[-1]))
    print("disc_loss =" + str(disc_losses[-1]))
    gloss.append(gen_losses[-1])
    dloss.append(disc_losses[-1])

    # Produce images for the GIF as you go
    x = generate_and_save_images(generator,
                             epoch + 1,
                             seed)

    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

  # Generate after the final epoch
  saved = generate_and_save_images(generator,
                           epochs,
                           seed)
  return saved

In [934]:
def generate_and_save_images(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).
  predictions = model(test_input, training=False)
  return predictions


In [935]:
gen_losses = []
disc_losses = []
gloss = []
dloss = []

In [936]:
final = train(train_dataset,EPOCHS, gen_losses, disc_losses, gloss, dloss)


gen_loss =0.060624305
disc_loss =0.071061075
Time for epoch 1 is 0.34661078453063965 sec
gen_loss =0.026564864
disc_loss =0.039759662
Time for epoch 2 is 0.25810790061950684 sec
gen_loss =0.012241357
disc_loss =0.021960262
Time for epoch 3 is 0.29386305809020996 sec
gen_loss =0.0060195937
disc_loss =0.01424025
Time for epoch 4 is 0.2868809700012207 sec
gen_loss =0.0032154853
disc_loss =0.008702753
Time for epoch 5 is 0.28747105598449707 sec
gen_loss =0.0018650821
disc_loss =0.006474263
Time for epoch 6 is 0.27715301513671875 sec
gen_loss =0.0011696984
disc_loss =0.004635112
Time for epoch 7 is 0.264340877532959 sec
gen_loss =0.0007883267
disc_loss =0.0039081727
Time for epoch 8 is 0.2923448085784912 sec
gen_loss =0.0005678054
disc_loss =0.002928422
Time for epoch 9 is 0.29558587074279785 sec
gen_loss =0.00043019326
disc_loss =0.0024883032
Time for epoch 10 is 0.2567570209503174 sec
gen_loss =0.0003389917
disc_loss =0.0020479509
Time for epoch 11 is 0.2790830135345459 sec
gen_loss =0.00

gen_loss =3.6833812e-06
disc_loss =4.3756085e-05
Time for epoch 91 is 0.2905540466308594 sec
gen_loss =3.6011465e-06
disc_loss =5.006539e-05
Time for epoch 92 is 0.2612910270690918 sec
gen_loss =3.5198946e-06
disc_loss =3.3515982e-05
Time for epoch 93 is 0.31652402877807617 sec
gen_loss =3.4404848e-06
disc_loss =3.835274e-05
Time for epoch 94 is 0.29595494270324707 sec
gen_loss =3.3671115e-06
disc_loss =4.232136e-05
Time for epoch 95 is 0.30974793434143066 sec
gen_loss =3.2978858e-06
disc_loss =4.4236516e-05
Time for epoch 96 is 0.31353116035461426 sec
gen_loss =3.2296496e-06
disc_loss =2.6855003e-05
Time for epoch 97 is 0.32170796394348145 sec
gen_loss =3.1630968e-06
disc_loss =6.0110084e-05
Time for epoch 98 is 0.28559017181396484 sec
gen_loss =3.0966726e-06
disc_loss =3.8193728e-05
Time for epoch 99 is 0.2631418704986572 sec
gen_loss =3.0324927e-06
disc_loss =3.0122808e-05
Time for epoch 100 is 0.26233792304992676 sec
gen_loss =2.9694247e-06
disc_loss =3.604828e-05
Time for epoch 10

gen_loss =8.6808114e-07
disc_loss =1.666309e-05
Time for epoch 179 is 0.2709071636199951 sec
gen_loss =8.578456e-07
disc_loss =1.2999809e-05
Time for epoch 180 is 0.34367799758911133 sec
gen_loss =8.476975e-07
disc_loss =1.9473124e-05
Time for epoch 181 is 0.27929234504699707 sec
gen_loss =8.3715275e-07
disc_loss =9.540394e-06
Time for epoch 182 is 0.26207590103149414 sec
gen_loss =8.273636e-07
disc_loss =1.6374857e-05
Time for epoch 183 is 0.2610440254211426 sec
gen_loss =8.1731457e-07
disc_loss =1.3931424e-05
Time for epoch 184 is 0.26208019256591797 sec
gen_loss =8.074557e-07
disc_loss =1.1312338e-05
Time for epoch 185 is 0.26386094093322754 sec
gen_loss =7.9768137e-07
disc_loss =1.1424041e-05
Time for epoch 186 is 0.2575092315673828 sec
gen_loss =7.8830504e-07
disc_loss =1.9393648e-05
Time for epoch 187 is 0.2724730968475342 sec
gen_loss =7.787814e-07
disc_loss =9.431491e-06
Time for epoch 188 is 0.3042411804199219 sec
gen_loss =7.696315e-07
disc_loss =1.2417861e-05
Time for epoch 

In [937]:
final.eval

<bound method _EagerTensorBase.eval of <tf.Tensor: shape=(50, 284), dtype=float32, numpy=
array([[0.07925244, 0.        , 0.3401261 , ..., 0.        , 0.        ,
        0.11596388],
       [0.21729568, 0.10847286, 0.        , ..., 0.        , 0.        ,
        0.37117183],
       [0.7245279 , 0.        , 0.28576404, ..., 0.        , 0.        ,
        0.7793383 ],
       ...,
       [0.2528721 , 0.18030162, 0.02239932, ..., 0.        , 0.27466744,
        0.5849763 ],
       [0.29583615, 0.        , 0.38475183, ..., 0.        , 0.        ,
        0.30047476],
       [0.24639826, 0.03883468, 0.10360632, ..., 0.        , 0.0037256 ,
        0.2618022 ]], dtype=float32)>>

In [938]:
col = np.load('columns.npy', allow_pickle=True)

In [939]:
col

array(['formula_similarity', 'totreldiff', 'formula_frac',
       'correct_formula_frac', 'tc', 'sc_class_unique_sc',
       'num_elements_sc', 'lata_2', 'latb_2', 'latc_2', 'band_gap_2',
       'density_2', 'e_above_hull_2', 'efermi_2', 'encut_2', 'energy_2',
       'energy_per_atom_2', 'final_energy_2', 'final_energy_per_atom_2',
       'formation_energy_per_atom_2', 'has_bandstructure_2',
       'is_ordered_2', 'nsites_2', 'ntask_ids_2', 'total_magnetization_2',
       'cell_volume_2', 'is_magnetic_2', 'exchange_symmetry_2',
       'num_unique_magnetic_sites_2',
       'total_magnetization_normalized_vol_2',
       'total_magnetization_normalized_formula_units_2',
       'num_magnetic_sites_2', 'true_total_magnetization_2',
       'synth_doped', 'crystal_temp_2', 'no_crystal_temp_given_2',
       'cubic', 'hexagonal', 'monoclinic', 'orthorhombic', 'tetragonal',
       'triclinic', 'trigonal', 'primitive', 'base-centered',
       'body-centered', 'face-centered', 'weight', 'Unnamed: 

In [940]:
col.shape

(284,)

In [941]:
generated = pd.DataFrame(final, columns=col)

In [942]:
save = generated.to_pickle("klgen_data.pkl")


In [943]:

plt.figure(figsize=(10,8))
plt.plot(list(range(0,EPOCHS)),gloss)
plt.plot(list(range(0,EPOCHS)),dloss)
plt.title('Model Loss over Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['generator loss','discriminator loss'])
plt.savefig('Learning_curve GAN least squares')
plt.show
plt.close()