In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
import pickle

from mendeleev import element
import pymatgen as pmg
from ast import literal_eval

from scipy.stats import gmean, entropy

from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import xgboost as xgb

%matplotlib inline

## Importing notebook containing relevant feature vector

create_features(chem_compostion)  <br/>
create_elementMap()  <br/>
and some arithmetic functions for calcualting the feature vectors

In [2]:
%run Function_List.ipynb

## Load Models
### For critical temperature prediction: critTemp_pred
### For identifying superconductors: id_SC

In [3]:
regr_model = 'Hamidieh_Model.bin'
clas_model = 'SC_Classifier_prop.bin'

with open(regr_model, 'rb') as f:
    critTemp_pred = pickle.load(f)

with open(clas_model, 'rb') as obj:
    id_SC = pickle.load(obj)

In [4]:
obj = open('Hamidieh_Model_Errors.txt', 'r')

obj.readline()
error_lin = obj.readline()
metr_Values = error_lin.split('\t')

obj.close()

score_mae_train = float(metr_Values[0])
score_rmse_train = float(metr_Values[1])
score_mae_val = float(metr_Values[2])
score_rmse_val = float(metr_Values[3])

In [5]:
print('%.2f Kelvin' %(score_rmse_val))

9.27 Kelvin


### Check: whether the model has already seen the compound

In [6]:
SC_Data = pd.read_csv('unique_m.csv')
sc_list = list(SC_Data['material'])

def is_new(compound):   
    iden = 0
    for item in sc_list:
        if item == compound:
            iden = 1 
            break
    return iden

## Result Prediction

In [7]:
def pred_material(compound):
    
    compositon = pmg.Composition(compound)

    x_vec = create_features(compositon)

    class_value = id_SC.predict(np.array(x_vec).reshape(1,-1))
    tc = critTemp_pred.predict(np.array(x_vec).reshape(1,-1))
    
    print('--------------------------------------------------')
    print('Material: ' + compound)
    print('--------------------------------------------------')
    
    if class_value:
        print('Prediction: (Superconductor)')
        print('\nPredicted Tc for ' + compound + ' is: %.2f K\n' %(tc))
        if is_new(compound):
            print('(Status = Existing) Already present in Supercon Data.\n')
        else:
            print('(Status = Discovery) New superconductor!\n')
    else:
        print('Prediction:  (Non-Superconductor)\n')
    
    
    return class_value, tc, is_new(compound)

## RMSE error of Critical Temperature Prediciton is:
### 9.27 Kelvin

In [8]:
compound = 'CaF2'

class_value, tc, nu = pred_material(compound)

--------------------------------------------------
Material: CaF2
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for CaF2 is: 25.65 K

(Status = Discovery) New superconductor!



In [9]:
compound = 'LaH10'

class_value, tc, nu = pred_material(compound)

--------------------------------------------------
Material: LaH10
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for LaH10 is: 12.34 K

(Status = Discovery) New superconductor!



### Testing for some superconductors on wiki

link: https://en.wikipedia.org/wiki/List_of_superconductors

#### Importing formatted data

In [10]:
wiki_SC = pd.read_csv('new_SC_copy.csv', sep='\t')
wiki_SC.drop(['Unnamed: 0'], axis=1, inplace=True)

wiki_SC.columns = ['Material', 'Tc']

In [11]:
list_Tc = list(wiki_SC['Tc'])
list_material = list(wiki_SC['Material'])

In [12]:
wiki_SC = wiki_SC.applymap(str)

### Critical Temperature values for the wiki materials

In [13]:
Y_tc = [str(item) for item in list_Tc]
Y_tc = [literal_eval(item) for item in Y_tc]

### Classification values for the wiki materials

In [14]:
Y_class = [1]*len(list_material)

### Classifying new materials imported from wiki

In [15]:
pred_class = []
pred_tc = []

non_sc_index = []

old_count = 0

In [16]:
for index, item in enumerate(list_material):
    class_value, tc, nu = pred_material(item)
    
    pred_class.append(class_value)
    pred_tc.append(tc)
    
    if nu:
        old_count = old_count + 1
        non_sc_index.append(index)

--------------------------------------------------
Material: Al
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Al is: 1.28 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Bi
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Bi is: 5.86 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Cd
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Cd is: 0.59 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Ga
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Ga is: 5.96 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Hf
--------------------------------------------------
Predictio

  log_a = np.log(a)


--------------------------------------------------
Material: Nb3Al
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Nb3Al is: 15.11 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Nb3Ge
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for Nb3Ge is: 9.10 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: NbO
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for NbO is: 1.83 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: NbN
--------------------------------------------------
Prediction: (Superconductor)

Predicted Tc for NbN is: 13.01 K

(Status = Discovery) New superconductor!

--------------------------------------------------
Material: Nb3Sn
---------------------------------------

In [17]:
print('No. of previously seen superconductors in the wiki dataset:   ', old_count)

No. of previously seen superconductors in the wiki dataset:    3


### Accuracy measures of the Classification model

In [18]:
print('Accuracy Score\t\t: ' + str(accuracy_score(Y_class,pred_class)))
print('Precision Score\t\t: ' + str(precision_score(Y_class,pred_class)))
print('Recall Score\t\t: ' + str(recall_score(Y_class,pred_class)))
print('F1 Score\t\t: ' + str(f1_score(Y_class,pred_class)))

Accuracy Score		: 0.927536231884058
Precision Score		: 1.0
Recall Score		: 0.927536231884058
F1 Score		: 0.9624060150375939


### RMSE of the Tc prediction

In [19]:
# Removing tc for non-superconductors

for item in non_sc_index:
    del Y_tc[item]
    del pred_tc[item]

In [20]:
rmse_val = sqrt(mean_squared_error(Y_tc, pred_tc))

print('RMSE of Tc Prediction:\t %.2f' %(rmse_val))

RMSE of Tc Prediction:	 16.77
