This file takes chemical composition as input, and returns the predicted Tc value.
For the development of the predictive model, see the other jupyter document

In [1]:
import re
import pandas as pd
import numpy as np
import math
import pickle

In [2]:
# unpickle the Columns
infile=open('Columns', 'rb')
Columns=pickle.load(infile)
infile.close()

# unpickle the model
infile=open('GB_model', 'rb')
model=pickle.load(infile)
infile.close()

In [3]:
composition=input('Enter the material composition \n').upper()
if composition[-1].isalpha():# each element has to have a proportion. If numer is not mentioned, it is 1. For example H2O is H2O1
    composition=composition+"1"# number one is added because sometimes it is not mentioned

Enter the material composition 
H2O


The first step is to convert this composition (which is a string) to two lists:
    Elements: list of elements
    Amounts: the portion of each element in the Elements list.

In [4]:
# extract the elements
# we get two lists, Elements and Amounts
amounts=re.findall(r"[-+]?\d*\.\d+|\d+", composition)# this gets anything that is digit or '.'
elements=re.findall('(\D+)',composition)# this gets anything that is not digit.
Elements=[i for i in elements if i!='.']# the element list still has decimal points. this line removes '.'
fAmounts=[float(i) for i in amounts]
Amounts=[i/sum(fAmounts) for i in fAmounts]# normalize the proportions
#print(Amounts)# these are normalized to total of 1
#print(Elements)        

Now, let's extract the features

In [5]:
properties=pd.read_csv('Properties.csv', index_col='Element')# Properties.csv has the properties of all elements.
properties.head()

Unnamed: 0_level_0,Atomicnumber,AtomicMass,IonizationEnergy,AtomicRadius,Density,ElecAffinity,HeatFusion,ThermConduc,Valence
Element,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
H,1,1.00797,1312.0,53,0.09,72.769,0.558,0.1805,1
HE,2,4.0026,2372.3,31,0.18,-48.0,0.02,0.1513,0
LI,3,6.941,520.2,167,530.0,59.632,3.0,85.0,1
BE,4,9.01218,899.5,112,1850.0,-48.0,7.95,190.0,2
B,5,10.81,800.6,87,2340.0,26.989,50.0,27.0,3


In [6]:
# this function returns 10 features for each property (e.g. melting temp, conductivity, etc)
def get_feature(Elements,Amounts,Property):# the Propertycorrespondsto a column in the properties dataframe
    property=[]
    for element in Elements:
        prop=properties.loc[element,Property]
        property.append(prop)
    Mean= sum(property) / len(property)
    Wtd_mean= 0
    for i in range(len(Amounts)):
        Wtd_mean=Wtd_mean+ Amounts[i]*property[i]
    Gmean=property[0]
    for i in range (1,len(Amounts)):
        Gmean=Gmean*property[i]
    Gmean=math.sqrt(Gmean)
    
    WtdGmean=property[0]**Amounts[0]
    for i in range (1,len(Amounts)):
        WtdGmean=WtdGmean*property[i]**Amounts[i]   
    Entropy=0
    for i in range (len(Amounts)):
        Entropy=Entropy-property[i]/sum(property)*math.log(property[i]/sum(property))
    
    WtdEntropy=0
    denomin=0
    for i in range (len(Amounts)):
        denomin=denomin+Amounts[i]*property[i]/sum(property)
    for i in range(len(Amounts)):
        A=Amounts[i]*property[i]/sum(property)/denomin
        WtdEntropy=WtdEntropy-A*math.log(A)
        
    Range=max(property)-min(property)
    
    propMax=max(property)
    propMin=min(property)
    AmountMax=Amounts[property.index(propMax)]
    AmountMin=Amounts[property.index(propMin)]
    WtdRange=propMax*AmountMax-propMin*AmountMin
    
    stDev=0
    for i in range(len(Amounts)):
        stDev=stDev+(property[i]-Mean)**2
    stDev=math.sqrt(0.5*stDev)
    
    WtdStDev=0
    for i in range(len(Amounts)):
        WtdStDev=WtdStDev+Amounts[i]*(property[i]-Wtd_mean)**2
    WtdStDev=math.sqrt(WtdStDev)
    
    return Mean, Wtd_mean, Gmean, WtdGmean,Entropy,WtdEntropy,Range,WtdRange,stDev,WtdStDev
    

In [8]:
# now we want a function that takes the composition and returns 80 features.
def get_features(Elements,Amounts):# Amounts is a list of proportions, and Elements is a list of elements
    # for each of 8 properties, get 10 features
    numElements=(len(Elements),)
    AtomicMass_features = get_feature(Elements,Amounts,'AtomicMass')
    IonizationEnergy_features = get_feature(Elements,Amounts,'IonizationEnergy')
    AtomicRadius_features = get_feature(Elements,Amounts,'AtomicRadius')
    Density_features = get_feature(Elements,Amounts,'Density')
    ElecAffinity_features = get_feature(Elements,Amounts,'ElecAffinity')
    HeatFusion_features = get_feature(Elements,Amounts,'HeatFusion')
    ThermConduc_features = get_feature(Elements,Amounts,'ThermConduc')
    Valence_features = get_feature(Elements,Amounts,'Valence')
    features=numElements+AtomicMass_features+IonizationEnergy_features+AtomicRadius_features+Density_features+ElecAffinity_features+HeatFusion_features+ThermConduc_features+Valence_features
    return features# this is a tuple of 80 features  

In [9]:
# fomulate the X, which is the input features as a panda dataframe

In [10]:
X_array=np.array(get_features(Elements,Amounts))
X_array.shape=(1,81)
X=pd.DataFrame(X_array, columns=Columns)
# if the model is xgb, convert X to Matrix.
# if the model is NN, normalize X.
y=model.predict(X)
print('The predicted Tc for %s is %.2f K' %(composition,y))

The predicted Tc for H2O1 is 38.80 K
