In [1]:
# Libraries

# Python

from random import shuffle
import pandas as pd
import numpy as np
import sys
import json

# Machine Learning

import tensorflow as tf
from tensorflow import keras
from keras import initializers
from keras.layers import Dense
from keras.models import Sequential
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Data / Plotting

import matplotlib.pyplot as plt
import pymatgen as pymat
import mendeleev as mendel

%matplotlib inline

Using TensorFlow backend.


In [2]:
# CREATION OF A DICTIONARY WITH VALUES TAKEN FROM PYMATGEN AND MENDELEEV

elements_list = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg',
            'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr',
            'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br',
            'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag',
            'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'Hf', 'Ta', 'W',
            'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'La', 'Ce', 'Pr',
            'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu',
            'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu']


element_property = {}

for item in elements_list:
    element_property[item] = {}
    element_property[item]["Young"]= pymat.Element(item).youngs_modulus
    element_property[item]["Lattice"] = mendel.element(item).lattice_constant
    element_property[item]["MeltingPoint"] = mendel.element(item).melting_point
    element_property[item]["SpecificHeat"]= mendel.element(item).specific_heat
    element_property[item]["AtomicMass"] = pymat.Element(item).atomic_mass
    element_property[item]["CTE"] = pymat.Element(item).coefficient_of_linear_thermal_expansion
    element_property[item]["ElectRes"] = pymat.Element(item).electrical_resistivity
    element_property[item]["AtomRad"] = pymat.Element(item).atomic_radius
    element_property[item]["Density"] = pymat.Element(item).density_of_solid
    element_property[item]["Poissons"] = pymat.Element(item).poissons_ratio
    
#print(element_property)
# There is 127 'None' values in this dictionary

In [3]:
# And here, we may load it in
raw_df = np.loadtxt(open("basic_df.csv", "r"), delimiter = ',', dtype = str, skiprows = 0)

# Get the labels
df_labels = raw_df[0,1:]

# Make the dataframe
df = pd.DataFrame(raw_df[1:,1:], columns = df_labels)

# Convert to numeric values
df.iloc[:,1:] = df.iloc[:,1:].apply(pd.to_numeric)
df.head()

Unnamed: 0,internal_id,avg_hardness,avg_yield,Al,As,B,Bi,C,Ca,Ce,...,youngs_modulus,lattice_constant,melting_point,specific_heat,atomic_mass,CTE,electrical_resistivity,atomic_radius,density_of_solid,poissons_ratio
0,MINFM3654700,95,168.0,95.8,0,0,0,0,0,0,...,70.1326,3.97837,930.851,0.879556,26.9799,2.24361e-05,4.0299e-06,1.23386,2716.99,0.341349
1,MINFM3654300,30,55.0,95.8,0,0,0,0,0,0,...,70.1326,3.97837,930.851,0.879556,26.9799,2.24361e-05,4.0299e-06,1.23386,2716.99,0.341349
2,MINFM3654600,95,168.0,95.8,0,0,0,0,0,0,...,70.1326,3.97837,930.851,0.879556,26.9799,2.24361e-05,4.0299e-06,1.23386,2716.99,0.341349
3,MINFM3654500,65,127.5,95.8,0,0,0,0,0,0,...,70.1326,3.97837,930.851,0.879556,26.9799,2.24361e-05,4.0299e-06,1.23386,2716.99,0.341349
4,MINFM3654400,65,122.5,95.8,0,0,0,0,0,0,...,70.1326,3.97837,930.851,0.879556,26.9799,2.24361e-05,4.0299e-06,1.23386,2716.99,0.341349


In [4]:
# Get only the elemental compositions and make sure they are numerical values
elem_df = df.iloc[:,3:-10].copy().apply(pd.to_numeric)
final_data = elem_df.copy()

#display(elem_df)

mod_temp = elem_df.copy()
for item in mod_temp.columns.tolist():
    mod_temp[item] *= element_property[item]["AtomicMass"]/100
final_data["AVRG AtomicMass"] = mod_temp.sum(axis = 1, skipna = False)

binary_temp = elem_df.copy()
binary_temp.where(binary_temp == 0, 1, inplace= True)

#display(binary_temp)

mod_binary_temp = binary_temp.copy()
for item in mod_binary_temp.columns.tolist():
    mod_binary_temp[item] *= element_property[item]["AtomRad"]
final_data["MXMN AtomRad"] =  mod_binary_temp.max(axis = 1) - mod_binary_temp[mod_binary_temp > .01].min(axis=1)

#display(mod_binary_temp)

display(final_data)

Unnamed: 0,Al,As,B,Bi,C,Ca,Ce,Co,Cr,Cu,...,Si,Sn,Ta,Ti,V,W,Zn,Zr,AVRG AtomicMass,MXMN AtomRad
0,95.8,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.04,0.15,...,0.40,0.0,0.0,0.15,0.00,0.0,0.25,0.00,26.979858,0.40
1,95.8,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.04,0.15,...,0.40,0.0,0.0,0.15,0.00,0.0,0.25,0.00,26.979858,0.40
2,95.8,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.04,0.15,...,0.40,0.0,0.0,0.15,0.00,0.0,0.25,0.00,26.979858,0.40
3,95.8,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.04,0.15,...,0.40,0.0,0.0,0.15,0.00,0.0,0.25,0.00,26.979858,0.40
4,95.8,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.04,0.15,...,0.40,0.0,0.0,0.15,0.00,0.0,0.25,0.00,26.979858,0.40
5,0.0,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.00,0.00,...,0.00,0.0,0.0,0.00,0.00,0.0,0.00,0.00,57.326168,0.05
6,0.5,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,19.00,0.00,...,0.00,0.0,0.0,0.90,0.00,0.0,0.00,0.00,59.128091,0.20
7,91.3,0.0,0.0000,0.2,0.00,0.0,0.0,0.0,0.00,5.00,...,0.40,0.0,0.0,0.00,0.00,0.0,0.30,0.00,29.343290,0.70
8,90.4,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.10,3.90,...,0.50,0.0,0.0,0.15,0.00,0.0,0.25,0.20,28.139077,0.45
9,99.6,0.0,0.0000,0.0,0.00,0.0,0.0,0.0,0.00,0.05,...,0.25,0.0,0.0,0.03,0.05,0.0,0.05,0.00,27.267365,0.40


In [5]:
# EXAMPLE TO VALIDATE THE SHUFFLING

trash_data = [[str(y) + str(x) for x in range(10)] for y in range(10)]
df = pd.DataFrame(trash_data)
display(df)

import random

# I get all the values as a numpy array here
all_values = df.iloc[:,1:].values
all_labels = df.iloc[:,0].values

# Uncomment the line below to shuffle the dataset (we do not do this here to ensure consistent results for every run)
#order = list(range(0, len(df)))
order = np.arange(len(df))
random.shuffle(order)         # This numpy argsort returns the indexes that would be used to shuffle a list
some_values = all_values[order]
some_labels = all_labels[order]
some_materials = (df.iloc[:,0].values)[order]

print(some_values)
print(some_labels)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,2,3,4,5,6,7,8,9
1,10,11,12,13,14,15,16,17,18,19
2,20,21,22,23,24,25,26,27,28,29
3,30,31,32,33,34,35,36,37,38,39
4,40,41,42,43,44,45,46,47,48,49
5,50,51,52,53,54,55,56,57,58,59
6,60,61,62,63,64,65,66,67,68,69
7,70,71,72,73,74,75,76,77,78,79
8,80,81,82,83,84,85,86,87,88,89
9,90,91,92,93,94,95,96,97,98,99


[['71' '72' '73' '74' '75' '76' '77' '78' '79']
 ['51' '52' '53' '54' '55' '56' '57' '58' '59']
 ['41' '42' '43' '44' '45' '46' '47' '48' '49']
 ['11' '12' '13' '14' '15' '16' '17' '18' '19']
 ['01' '02' '03' '04' '05' '06' '07' '08' '09']
 ['21' '22' '23' '24' '25' '26' '27' '28' '29']
 ['91' '92' '93' '94' '95' '96' '97' '98' '99']
 ['31' '32' '33' '34' '35' '36' '37' '38' '39']
 ['61' '62' '63' '64' '65' '66' '67' '68' '69']
 ['81' '82' '83' '84' '85' '86' '87' '88' '89']]
['70' '50' '40' '10' '00' '20' '90' '30' '60' '80']
