In [16]:
import re
import pandas as pd
import numpy as np
import math
import pickle

In [17]:
# Accept the chemical composition, and returns the predicted Tc value.
# The model has already been trained (see model_development.ipynb)

In [18]:
# Get the predictive model
infile = open('GB_model', 'rb')
model = pickle.load(infile)
infile.close()

# Get the names of model features
infile = open('Columns', 'rb')
Columns = pickle.load(infile)
infile.close()

In [20]:
# Get properties of each element in the composition
properties = pd.read_csv('Properties.csv', index_col='Element')

In [25]:
def get_composition():
    # Get composition and return two lists for elements and proportions
    # If the last element does not have proportion value, add 1 (e.g. H2O -> H2O1)
    composition = input('Enter the material composition \n').upper()
    if composition[-1].isalpha():
        composition = composition+"1"
    
    # Convert composition (which is a string) to two lists:
    # elements: list of elements (e.g. H2O -> [H, O])
    # amounts: the normalized portion of each element (e.g. H2O -> [2/3, 1/3]).
    elements = re.findall('(\D+)', composition)
    if '.' in elements:
        element.remove('.')

    amounts = re.findall(r"[-+]?\d*\.\d+|\d+", composition)
    amounts = [float(i) for i in amounts]
    amounts = [i/sum(amounts) for i in amounts] 
    
    return elements, amounts
        

In [26]:
def get_feature(elements, amounts, property_name):
    # Returns 10 features for the given property (e.g. melting temp, conductivity, etc)
    
    property = []
    for element in elements:
        element_property = properties.loc[element, property_name]
        property.append(element_property)
    
    mean = sum(property) / len(property)
    
    weighted_mean = 0
    for amount, prop in zip(amounts, property):
        weighted_mean += amount * prop
    
    geometric_mean = 1
    for prop in property:
        geometric_mean = geometric_mean * prop
    geometric_mean = math.sqrt(geometric_mean)
    
    weighted_g_mean = 1
    for amount, prop in zip(amounts, property):
        weighted_g_mean = weighted_g_mean * prop ** amount
    
    entropy = 0
    for prop in property:
        entropy = entropy - prop / sum(property) * math.log(prop/sum(property))
     
    denomin=0
    for amount, prop in zip(amounts, property):
        denomin = denomin + amount * prop / sum(property)    
    weighted_entropy=0
    for amount, prop in zip(amounts, property):
        nomin = amount * prop / sum(property)
        A = nomin / denomin
        weighted_entropy = weighted_entropy - A * math.log(A)
        
    range = max(property)-min(property)
    
    prop_max = max(property)
    prop_min = min(property)
    amount_max = amounts[property.index(prop_max)]
    amount_min = amounts[property.index(prop_min)]
    weighted_range = prop_max * amount_max - prop_min *amount_min
    
    st_dev = 0
    for prop in property:
        st_dev = st_dev + (prop - mean) ** 2
    st_dev = math.sqrt(0.5 * st_dev)
    
    weighted_st_dev = 0
    for amount, prop in zip(amounts, property):
        weighted_st_dev += amount * (prop - weighted_mean) ** 2
    weighted_st_dev = math.sqrt(weighted_st_dev)
    
    res = (mean, 
           weighted_mean, 
           geometric_mean, 
           weighted_g_mean, 
           entropy, 
           weighted_entropy, 
           range, 
           weighted_range, 
           st_dev, 
           weighted_st_dev
    )
    return  res
    

In [30]:
def get_features(elements, amounts):
    # Accept the material composition and return all 80 features (10 feature per property)
    # Return a tuple of 80 elements.
    element_count = (len(elements),)
    atomicMass_features = get_feature(elements, amounts, 'AtomicMass')
    ionizationEnergy_features = get_feature(elements, amounts, 'IonizationEnergy')
    atomicRadius_features = get_feature(elements, amounts, 'AtomicRadius')
    density_features = get_feature(elements, amounts, 'Density')
    elecAffinity_features = get_feature(elements, amounts, 'ElecAffinity')
    heatFusion_features = get_feature(elements, amounts, 'HeatFusion')
    thermConduc_features = get_feature(elements, amounts, 'ThermConduc')
    valence_features = get_feature(elements, amounts, 'Valence')
    
    features = (element_count + 
                atomicMass_features +
                ionizationEnergy_features +
                atomicRadius_features +
                density_features +
                elecAffinity_features +
                heatFusion_features +
                thermConduc_features +
                valence_features)
    
    return features

In [31]:
# Get the features of the material. 
# If the model is xgb, convert X to Matrix.
# If the model is NN, normalize X.
def get_X(elements, amounts, Columns):
    X_array = np.array(get_features(elements,amounts))
    X_array.shape = (1,81)
    X=pd.DataFrame(X_array, columns=Columns)
    return X


In [32]:
elements, amounts = get_composition()
X = get_X (elements, amounts, Columns)
y=model.predict(X)
print('The predicted is %.2f K' %y)

Enter the material composition 
H2O
The predicted is 38.80 K
