In [109]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow.keras import layers
import time
from tensorflow import data
import pandas as pd
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense,ZeroPadding2D, BatchNormalization, Activation, Layer, ReLU, LeakyReLU,Conv2D,AveragePooling2D,UpSampling2D,Reshape,Flatten
from tensorflow.keras.models import Model
from keras import backend


In [110]:
BATCH_SIZE = 962


In [111]:
data = np.load('finDat.npy')

In [112]:
train_dataset = tf.data.Dataset.from_tensor_slices(data).shuffle(60000).batch(962)


In [114]:
DSHAPE = 143

In [115]:
def make_generator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', use_bias=False, input_shape=(DSHAPE,)))
    model.add(layers.BatchNormalization())

    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(128,activation='relu'))


    model.add(layers.Dense(64,activation='relu'))
    model.add(layers.Dense(64,activation='relu'))

    model.add(layers.Dense(DSHAPE, activation='relu', use_bias=False, input_shape=(DSHAPE,)))


    

    return model

In [116]:
generator = make_generator_model()

noise = tf.random.normal([1, DSHAPE])
generated_image = generator(noise, training=False)


In [117]:
def make_discriminator_model():
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', use_bias=False, input_shape=(DSHAPE,)))
    model.add(layers.BatchNormalization())

    model.add(layers.Dropout(0.5))

    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dropout(0.5))
   
    model.add(layers.Dense(128,activation='relu'))
    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='relu'))
    return model

In [118]:
discriminator = make_discriminator_model()

In [119]:
def discriminator_loss(real_output, fake_output):
    real_loss = 0.5*tf.math.reduce_mean((real_output)**2) + 0.5*tf.math.reduce_mean(fake_output**2)
    return real_loss

In [120]:
def generator_loss(fake_output):
    return tf.math.reduce_mean((fake_output)**2)

In [121]:
generator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)
discriminator_optimizer = tf.keras.optimizers.legacy.Adam(1e-4)

In [122]:
checkpoint_dir = './training_checkpointsL2'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
                                 discriminator_optimizer=discriminator_optimizer,
                                 generator=generator,
                                 discriminator=discriminator)

In [123]:
EPOCHS = 200
noise_dim = DSHAPE
num_examples_to_generate = 50

# You will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
seed = tf.random.normal([num_examples_to_generate, noise_dim])

In [124]:
train_dataset

<BatchDataset element_spec=TensorSpec(shape=(None, 143), dtype=tf.float32, name=None)>

In [125]:
def train_step(images, gen_losses, disc_losses):
    noise = tf.random.normal([BATCH_SIZE, noise_dim])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = generator(noise, training=True)

        real_output = discriminator(images, training=True)
        fake_output = discriminator(generated_images, training=True)

        gen_loss = generator_loss(fake_output)

        disc_loss = discriminator_loss(real_output, fake_output)
        gen_losses = gen_losses.append(gen_loss.numpy())
        disc_losses = disc_losses.append(disc_loss.numpy())
    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss , discriminator.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))

In [126]:

def train(dataset, epochs, gen_losses, disc_losses, gloss, dloss):
  for epoch in range(epochs):
    start = time.time()


    for image_batch in dataset:
        train_step(image_batch, gen_losses,disc_losses)
    print("gen_loss =" + str(gen_losses[-1]))
    print("disc_loss =" + str(disc_losses[-1]))
    gloss.append(gen_losses[-1])
    dloss.append(disc_losses[-1])

    # Produce images for the GIF as you go
    x = generate_and_save_images(generator,
                             epoch + 1,
                             seed)

    # Save the model every 15 epochs
    if (epoch + 1) % 15 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))

  # Generate after the final epoch
  saved = generate_and_save_images(generator,
                           epochs,
                           seed)
  return saved

In [127]:
def generate_and_save_images(model, epoch, test_input):
  # Notice `training` is set to False.
  # This is so all layers run in inference mode (batchnorm).
  predictions = model(test_input, training=False)
  return predictions


In [128]:
gen_losses = []
disc_losses = []
gloss = []
dloss = []

In [129]:
final = train(train_dataset,EPOCHS, gen_losses, disc_losses, gloss, dloss)


gen_loss =0.10736032
disc_loss =0.12233751
Time for epoch 1 is 0.48154211044311523 sec
gen_loss =0.03712591
disc_loss =0.066346265
Time for epoch 2 is 0.10625791549682617 sec
gen_loss =0.011511226
disc_loss =0.039168555
Time for epoch 3 is 0.09639573097229004 sec
gen_loss =0.0032547964
disc_loss =0.022727866
Time for epoch 4 is 0.09784102439880371 sec
gen_loss =0.000724386
disc_loss =0.019202895
Time for epoch 5 is 0.09609174728393555 sec
gen_loss =0.00016477112
disc_loss =0.011166222
Time for epoch 6 is 0.09731936454772949 sec
gen_loss =4.6093846e-05
disc_loss =0.0053862287
Time for epoch 7 is 0.0948488712310791 sec
gen_loss =2.2160555e-05
disc_loss =0.008494082
Time for epoch 8 is 0.09573030471801758 sec
gen_loss =1.1156347e-05
disc_loss =0.006986176
Time for epoch 9 is 0.0986790657043457 sec
gen_loss =5.531006e-06
disc_loss =0.00270519
Time for epoch 10 is 0.09329509735107422 sec
gen_loss =2.1793542e-06
disc_loss =0.0030976746
Time for epoch 11 is 0.09653401374816895 sec
gen_loss =5

gen_loss =0.0
disc_loss =1.5389209e-05
Time for epoch 101 is 0.0943453311920166 sec
gen_loss =0.0
disc_loss =1.8790499e-07
Time for epoch 102 is 0.09557199478149414 sec
gen_loss =0.0
disc_loss =0.000105934894
Time for epoch 103 is 0.09369015693664551 sec
gen_loss =0.0
disc_loss =5.645769e-06
Time for epoch 104 is 0.09481287002563477 sec
gen_loss =0.0
disc_loss =2.154166e-05
Time for epoch 105 is 0.18477797508239746 sec
gen_loss =0.0
disc_loss =0.00013371377
Time for epoch 106 is 0.11040997505187988 sec
gen_loss =0.0
disc_loss =9.28457e-05
Time for epoch 107 is 0.10278606414794922 sec
gen_loss =0.0
disc_loss =9.066258e-06
Time for epoch 108 is 0.1104729175567627 sec
gen_loss =0.0
disc_loss =0.00024715028
Time for epoch 109 is 0.11578202247619629 sec
gen_loss =0.0
disc_loss =3.4146158e-06
Time for epoch 110 is 0.11699700355529785 sec
gen_loss =0.0
disc_loss =2.5909635e-06
Time for epoch 111 is 0.10072088241577148 sec
gen_loss =0.0
disc_loss =1.1644116e-05
Time for epoch 112 is 0.09630179

In [137]:
final.eval

<bound method _EagerTensorBase.eval of <tf.Tensor: shape=(50, 143), dtype=float32, numpy=
array([[0.        , 0.45634902, 0.4358928 , ..., 0.        , 0.7442949 ,
        0.01004054],
       [0.        , 0.        , 0.        , ..., 0.        , 0.4741718 ,
        0.        ],
       [0.        , 0.        , 0.14577867, ..., 0.        , 0.30105567,
        0.        ],
       ...,
       [0.06583896, 0.41397482, 0.42327443, ..., 0.        , 0.44163573,
        0.        ],
       [0.15938419, 0.2949909 , 0.2957939 , ..., 0.        , 0.5751798 ,
        0.        ],
       [0.        , 0.39786127, 0.32274055, ..., 0.        , 0.3726675 ,
        0.        ]], dtype=float32)>>

In [138]:
col = np.load('columns.npy', allow_pickle=True)

In [139]:
col

array(['formula_similarity', 'totreldiff', 'formula_frac',
       'num_elements_sc', 'lata_2', 'latb_2', 'latc_2', 'band_gap_2',
       'density_2', 'e_above_hull_2', 'efermi_2', 'encut_2', 'energy_2',
       'energy_per_atom_2', 'final_energy_2', 'final_energy_per_atom_2',
       'formation_energy_per_atom_2', 'nsites_2', 'ntask_ids_2',
       'total_magnetization_2', 'cell_volume_2', 'exchange_symmetry_2',
       'num_unique_magnetic_sites_2',
       'total_magnetization_normalized_vol_2',
       'total_magnetization_normalized_formula_units_2',
       'num_magnetic_sites_2', 'true_total_magnetization_2',
       'Reason for exclusion', 'crystal_temp_2', 'cubic', 'hexagonal',
       'monoclinic', 'orthorhombic', 'tetragonal', 'triclinic',
       'trigonal', 'primitive', 'base-centered', 'body-centered',
       'face-centered', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F',
       'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca',
       'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 

In [140]:
col.shape

(143,)

In [141]:
generated = pd.DataFrame(final, columns=col)

In [142]:
save = generated.to_pickle("klgen_data.pkl")


In [143]:

plt.figure(figsize=(10,8))
plt.plot(list(range(0,EPOCHS)),gloss)
plt.plot(list(range(0,EPOCHS)),dloss)
plt.title('Model Loss over Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['generator loss','discriminator loss'])
plt.savefig('Learning_curve GAN least squares')
plt.show
plt.close()