In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import initializers
from keras.layers import Dense
from keras.models import Sequential
%matplotlib inline
import matplotlib.pyplot as plt
import sys
import json

fcc_elements = ["Ag", "Al", "Au", "Cu", "Ir", "Ni", "Pb", "Pd", "Pt", "Rh", "Th", "Yb"]
bcc_elements = ["Ba", "Cr", "Cs", "Eu", "Fe", "Li", "Mn", "Mo", "Na", "Nb", "Rb", "Ta", "V", "W" ]
hcp_elements = ["Be", "Ca", "Cd", "Co", "Dy", "Er", "Gd", "Hf", "Ho", "Lu", "Mg", "Re", 
                "Ru", "Sc", "Tb", "Ti", "Tl", "Tm", "Y", "Zn", "Zr"]
others = ["Si", "Ge"] # "Si" and "Ge" are Face-centered diamond-cubic;

elements = fcc_elements + others + bcc_elements + hcp_elements

querable_mendeleev = ["atomic_number", "atomic_volume", "boiling_point", "en_ghosh",  "evaporation_heat", "heat_of_formation",
                     "lattice_constant", "melting_point", "specific_heat"]
querable_pymatgen = ["atomic_mass", "atomic_radius", "electrical_resistivity","molar_volume", "bulk_modulus", "youngs_modulus",
                     "average_ionic_radius", "density_of_solid", "coefficient_of_linear_thermal_expansion"]
querable_values = querable_mendeleev + querable_pymatgen

Using TensorFlow backend.


In [2]:
# Get the data

with open("all_values.csv", "r") as f:
    all_values = json.load(f)

# Pandas Dataframe
df = pd.DataFrame(all_values, columns=querable_values)

# We will patch some of the values that are not available in the datasets.

# Value for the CTE of Cesium
index_Cs = df.index[df['atomic_number'] == 55]
df.iloc[index_Cs, df.columns.get_loc("coefficient_of_linear_thermal_expansion")] = 0.000097 
# Value from: David R. Lide (ed), CRC Handbook of Chemistry and Physics, 84th Edition. CRC Press. Boca Raton, Florida, 2003

# Value for the CTE of Rubidium
index_Rb = df.index[df['atomic_number'] == 37]
df.iloc[index_Rb, df.columns.get_loc("coefficient_of_linear_thermal_expansion")] = 0.000090 
# Value from: https://www.azom.com/article.aspx?ArticleID=1834

# Value for the Evaporation Heat of Ruthenium
index_Ru = df.index[df['atomic_number'] == 44]
df.iloc[index_Ru, df.columns.get_loc("evaporation_heat")] = 595 # kJ/mol 
# Value from: https://www.webelements.com/ruthenium/thermochemistry.html

# Value for the Bulk Modulus of Zirconium
index_Zr = df.index[df['atomic_number'] == 40]
df.iloc[index_Zr, df.columns.get_loc("bulk_modulus")] = 94 # GPa 
# Value from: https://materialsproject.org/materials/mp-131/

# Value for the Bulk Modulus of Germanium
index_Ge = df.index[df['atomic_number'] == 32]
df.iloc[index_Ge, df.columns.get_loc("bulk_modulus")] = 77.2 # GPa 
# Value from: https://www.crystran.co.uk/optical-materials/germanium-ge

# Value for the Young's Modulus of Germanium
index_Ge = df.index[df['atomic_number'] == 32]
df.iloc[index_Ge, df.columns.get_loc("youngs_modulus")] = 102.7 # GPa 
# Value from: https://www.crystran.co.uk/optical-materials/germanium-ge

In [3]:
# First, we'll create the heatmap again
all_labels = df['youngs_modulus'].tolist()

# make a list of all the inputs
all_inputs = df.values.tolist()

# Make a list of the young's modulus column so that we can append it to the end
youngs_modulus = list(df['youngs_modulus'])

# Drop young's modulus column
df = df.drop('youngs_modulus', axis = 1)

# Create a new young's modulus column, this time at the end
df["youngs_modulus"] = youngs_modulus

# create a list of all the labels
labels = df.columns.tolist()

# Check that it's at the end
df.head()

Unnamed: 0,atomic_number,atomic_volume,boiling_point,en_ghosh,evaporation_heat,heat_of_formation,lattice_constant,melting_point,specific_heat,atomic_mass,atomic_radius,electrical_resistivity,molar_volume,bulk_modulus,average_ionic_radius,density_of_solid,coefficient_of_linear_thermal_expansion,youngs_modulus
0,47,10.3,2485.0,0.147217,254.1,284.9,4.09,1235.1,0.237,107.8682,1.6,1.63e-08,10.27,100.0,1.086667,10490.0,1.9e-05,83.0
1,13,10.0,2740.0,0.150078,284.1,330.9,4.05,933.5,0.9,26.981539,1.25,2.7e-08,10.0,76.0,0.675,2700.0,2.3e-05,70.0
2,79,10.2,3080.0,0.26137,340.0,368.2,4.08,1337.58,0.129,196.966569,1.35,2.2e-08,10.21,220.0,1.07,19300.0,1.4e-05,78.0
3,29,7.1,2840.0,0.151172,304.6,337.4,3.61,1356.6,0.385,63.546,1.35,1.72e-08,7.11,140.0,0.82,8920.0,1.7e-05,130.0
4,77,8.54,4403.0,0.25106,604.0,669.0,3.84,2683.0,0.133,192.217,1.35,4.7e-08,8.52,320.0,0.765,22650.0,6e-06,528.0


In [4]:
# Here, I will make lists of all the variables I would like to optmize
# The loop can then be modified to loop through any of these lists

# These are the values for the validation split
# The original value was 0.10
val_list = [0.12, 0.11, 0.10, 0.09, 0.08, 0.07, 0.06, 0.05, 0.04]

# These are the values that will be tested for the rms learning rate
# The original values used was 0.002
rms_list = [0.005, 0.003, 0.0025, 0.002, 0.0015, 0.001, 0.0005, 0.0002, 0.0001]

# These are the values that will be tested for the number of nodes for the first layer
# The original value was 32
nodes_1 = list(range(10,33))

# These are the values that will be tested for the number of nodes for the second layer
# The original value was 64
nodes_2 = list(range(10,75))

# I will also test how many layers is optimal, but this doesn't require a loop

In [5]:
# RMS Learning Rate Test

# How long the loop will be
the_end = 30

# How long the loop will be for rms
rms_end = len(rms_list)

# Create an empty list to which we'll add our final values
rms_values = []

# Open a file where will write our output to
v_file = open("rms_data_shuffled.csv", "w")

# Counting variables
i = 0
j = 0

In [6]:
# Validation Test
while j < the_end:
    i = 0
    while i < rms_end:
        # open file
        v_file = open("rms_data.csv", "a")
   
        # Reassign the dataframe
        sf = df
    
        # Drop youngs modulus
        sf = df.drop('youngs_modulus', axis = 1)

        all_values = [list(sf.iloc[x]) for x in range(len(all_values))]

        # SETS

        # List of lists are turned into Numpy arrays to facilitate calculations in steps to follow (Normalization).
        all_values = np.array(all_values, dtype = float) 
        #print("Shape of Values:", all_values.shape)
        all_labels = np.array(all_labels, dtype = float)
        #print("Shape of Labels:", all_labels.shape)

        # Uncomment the line below to shuffle the dataset (we do not do this here to ensure consistent results for every run)
        # order = np.argsort(np.random.random(all_labels.shape)) # This numpy argsort returns the indexes that would be used to shuffle a list
        order = np.arange(49)
        all_values = all_values[order]
        all_labels = all_labels[order]

        # Training Set
        train_labels = all_labels[:44]
        train_values = all_values[:44]

        # Testing Set
        test_labels = all_labels[-5:]
        test_values = all_values[-5:]

        # NORMALIZATION
        mean = np.mean(train_values, axis = 0) # mean
        std = np.std(train_values, axis = 0) # standard deviation

        train_values = (train_values - mean) / std # input scaling
        test_values = (test_values - mean) / std # input scaling

        # DEFINITION OF THE MODEL

        # The weights of our neural network will be initialized in a random manner, using a seed allows for reproducibility
        kernel_init = initializers.RandomNormal(seed=0)

        model = Sequential()
        model.add(Dense(32, activation='relu', input_shape=(train_values.shape[1], ), kernel_initializer=kernel_init))
        model.add(Dense(64, activation='relu', kernel_initializer=kernel_init))
        model.add(Dense(1, kernel_initializer=kernel_init))

        # DEFINITION OF THE OPTIMIZER

        optimizer = tf.train.RMSPropOptimizer(rms_list[i]) # Root Mean Squared Propagation

        # This line matches the optimizer to the model and states which metrics will evaluate the model's accuracy
        model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
        # model.summary()

        class PrintEpNum(keras.callbacks.Callback): # This is a function for the Epoch Counter
            def on_epoch_end(self, epoch, logs):
                sys.stdout.flush()
                sys.stdout.write("Current Epoch: " + str(epoch+1) + '\r') # Updates current Epoch Number

        mae_es= keras.callbacks.EarlyStopping(monitor='mean_absolute_error', patience=10, verbose=1, mode='auto', restore_best_weights=True)    
        EPOCHS = 10000 # Number of EPOCHS

        # HISTORY Object which contains how the model learned
        # Training Values (Properties), Training Labels (Known Young's Moduli)
        history = model.fit(train_values, train_labels, batch_size = train_values.shape[0], 
                        epochs = EPOCHS, verbose = False, validation_split = 0.10, callbacks=[mae_es, PrintEpNum()]) 


        [loss_train, mae_train] = model.evaluate(train_values, train_labels, verbose=0)
        [loss_test, mae_test] = model.evaluate(test_values, test_labels, verbose=0)

        # Here is where we append the dropped variable, mae test, and train values
        rms_values.append([rms_list[i],round(mae_train, 3), round(mae_test, 3)])
    
        # Display the current iteration
        print("The current iteration is \n" ,i)

        # write to a file
        v_file.write(" ".join(str(x) for x in rms_values[i]))
        v_file.write("\n")
    
        # Display the same information being written to the file
        print(" ".join(str(x) for x in rms_values[i]))

        # counting variable
        i = i + 1
    
        # Close the file    
        v_file.close()
    
    # Counting variable
    j = j + 1

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Restoring model weights from the end of the best epoch.
Epoch 00538: early stopping
The current iteration is 
 0
0.005 16.169 41.465
Restoring model weights from the end of the best epoch.
Epoch 00722: early stopping
The current iteration is 
 1
0.003 13.0 37.084
Restoring model weights from the end of the best epoch.
Epoch 00879: early stopping
The current iteration is 
 2
0.0025 11.132 27.243
Restoring model weights from the end of the best epoch.
Epoch 00942: early stopping
The current iteration is 
 3
0.002 11.098 18.42
Restoring model weights from the end of the best epoch.
Epoch 01122: early stopping
The current iteration is 
 4
0.0015 11.366 14.077
Restoring model weights from the end of the best epoch.
Epoch 01567: early stopping
The current iteration is 
 5
0.001 10.298 24.152
Restoring model weights from the end of the best epoch.
Epoch 02709: early stopping

KeyboardInterrupt: 

In [8]:
# RMS Learning Rate Test

# How long the loop will be
the_end = 30

# How long the loop will be for rms
rms_end = len(rms_list)

# Create an empty list to which we'll add our final values
rms_values = []

# Open a file where will write our output to
v_file = open("rms_data_shuffled.csv", "w")

# Counting variables
i = 0
j = 0

In [9]:
# Validation Test
while i < rms_end:
    j = 0
    while j < the_end:
        # open file
        v_file = open("rms_data_shuffled.csv", "a")
   
        # Reassign the dataframe
        sf = df
    
        # Drop youngs modulus
        sf = df.drop('youngs_modulus', axis = 1)

        all_values = [list(sf.iloc[x]) for x in range(len(all_values))]

        # SETS

        # List of lists are turned into Numpy arrays to facilitate calculations in steps to follow (Normalization).
        all_values = np.array(all_values, dtype = float) 
        #print("Shape of Values:", all_values.shape)
        all_labels = np.array(all_labels, dtype = float)
        #print("Shape of Labels:", all_labels.shape)

        # Uncomment the line below to shuffle the dataset (we do not do this here to ensure consistent results for every run)
        order = np.argsort(np.random.random(all_labels.shape)) # This numpy argsort returns the indexes that would be used to shuffle a list
        #order = np.arange(49)
        all_values = all_values[order]
        all_labels = all_labels[order]

        # Training Set
        train_labels = all_labels[:44]
        train_values = all_values[:44]

        # Testing Set
        test_labels = all_labels[-5:]
        test_values = all_values[-5:]

        # NORMALIZATION
        mean = np.mean(train_values, axis = 0) # mean
        std = np.std(train_values, axis = 0) # standard deviation

        train_values = (train_values - mean) / std # input scaling
        test_values = (test_values - mean) / std # input scaling

        # DEFINITION OF THE MODEL

        # The weights of our neural network will be initialized in a random manner, using a seed allows for reproducibility
        kernel_init = initializers.RandomNormal(seed=0)

        model = Sequential()
        model.add(Dense(32, activation='relu', input_shape=(train_values.shape[1], ), kernel_initializer=kernel_init))
        model.add(Dense(64, activation='relu', kernel_initializer=kernel_init))
        model.add(Dense(1, kernel_initializer=kernel_init))

        # DEFINITION OF THE OPTIMIZER

        optimizer = tf.train.RMSPropOptimizer(rms_list[i]) # Root Mean Squared Propagation

        # This line matches the optimizer to the model and states which metrics will evaluate the model's accuracy
        model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])
        # model.summary()

        class PrintEpNum(keras.callbacks.Callback): # This is a function for the Epoch Counter
            def on_epoch_end(self, epoch, logs):
                sys.stdout.flush()
                sys.stdout.write("Current Epoch: " + str(epoch+1) + '\r') # Updates current Epoch Number

        mae_es= keras.callbacks.EarlyStopping(monitor='mean_absolute_error', patience=10, verbose=1, mode='auto', restore_best_weights=True)    
        EPOCHS = 10000 # Number of EPOCHS

        # HISTORY Object which contains how the model learned
        # Training Values (Properties), Training Labels (Known Young's Moduli)
        history = model.fit(train_values, train_labels, batch_size = train_values.shape[0], 
                        epochs = EPOCHS, verbose = False, validation_split = 0.10, callbacks=[mae_es, PrintEpNum()]) 


        [loss_train, mae_train] = model.evaluate(train_values, train_labels, verbose=0)
        [loss_test, mae_test] = model.evaluate(test_values, test_labels, verbose=0)

        # Here is where we append the dropped variable, mae test, and train values
        rms_values.append([rms_list[i],round(mae_train, 3), round(mae_test, 3)])
    
        # Display the current iteration
        print("The current iteration of j is \n" ,j)

        # write to a file
        v_file.write(" ".join(str(x) for x in rms_values[i]))
        v_file.write("\n")
    
        # Display the same information being written to the file
        print(" ".join(str(x) for x in rms_values[i]))

        # Close the file    
        v_file.close()
        
        j = j + 1

# Display the current iteration
print("The current iteration of i is \n" ,i)
i = i + 1

Restoring model weights from the end of the best epoch.
Epoch 00438: early stopping
The current iteration is 
 0
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00392: early stopping
The current iteration is 
 1
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00374: early stopping
The current iteration is 
 2
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00049: early stopping
The current iteration is 
 3
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00268: early stopping
The current iteration is 
 4
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00362: early stopping
The current iteration is 
 5
0.005 13.021 17.831
Restoring model weights from the end of the best epoch.
Epoch 00153: early stopping
The current iteration is 
 6
0.005 13.021 17.831


KeyboardInterrupt: 