In [1]:
import os
import joblib
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle
import torch
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
import random
from modules.encoder import Encoder, Identity
from modules.function import pymatgen_comp, data_generator, check_cuda, stratify_data

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7fd96b43eab0>

In [3]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [4]:
saved_model_path = 'saved_models/hardness_models'
if not os.path.exists(saved_model_path):
  os.makedirs(saved_model_path)

In [5]:
gfa_dataset_file = 'gfa_dataset.txt'
z_row_column_file = 'Z_row_column.txt'
element_property_file = 'element_property.txt'
common_path = "Files_from_GTDL_paper/{}" 
gfa_dataset = pickle.load(open(common_path.format(gfa_dataset_file), 'rb'))  
RC = pickle.load(open(common_path.format(z_row_column_file), 'rb')) 
new_index=[int(i[4]) for i in RC]#new order 
Z_row_column = pickle.load(open(common_path.format(z_row_column_file), 'rb'))
[property_name_list,property_list,element_name,_]=pickle.load(open(common_path.format(element_property_file), 'rb'))

In [6]:
saved_models_path = 'saved_models'
type = 'PTR'
filename = 'PTR_Encoder.pt'
if os.path.exists(f'{saved_models_path}/{type}/{filename}'):
    PTR_encoder =  joblib.load(f'{saved_models_path}/{type}/{filename}')
else:
    print('No file found!')

PTR_encoder.mapf = Identity()

#Iteration 1 - Train with initial dataset, predict on all experimental values

In [7]:
initial_df_path = 'dataset/hardness_combined.csv'
df = pd.read_csv(initial_df_path,index_col=0)
data = df[['Composition','HardGPa']][df['As-cast/other']==1].values
comps = data[:,0]
hardGPa = data[:,1].reshape(-1,1)
pmg_comps = pymatgen_comp(comps)
comps_dset = data_generator(pmg_comps,property_list,element_name,RC)

In [8]:

enc_input = torch.from_numpy(comps_dset.real_data.astype('float32'))
cuda = check_cuda()
if cuda:
    enc_input = enc_input.cuda()
with torch.no_grad():
    PTR_features = PTR_encoder(enc_input).to('cpu').detach().numpy()

In [9]:
pca = PCA(n_components=10)
scaler_y = StandardScaler()
X = pca.fit_transform(PTR_features)
y = scaler_y.fit_transform(hardGPa)

In [58]:
param_grid = {'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,1)],
          'activation': ['relu','tanh','logistic'],
          'alpha': [0.0001, 0.05],
          'learning_rate': ['constant','adaptive'],
          'solver': ['adam']
          }
gridCV = GridSearchCV(MLPRegressor(batch_size=128), param_grid=param_grid,cv=10)
pipe = Pipeline([('Scaler', StandardScaler()),('grid',gridCV)])
pipe.fit(X,y.ravel())
joblib.dump(pipe,f'{saved_model_path}/MLP_hardness_iter0.pt')

['saved_models/hardness_models/MLP_hardness_iter0.pt']

In [24]:
pipe = joblib.load(f'{saved_model_path}/MLP_hardness_iter0.pt')

In [59]:
experimental_df_path = 'dataset/experimental_hardness.csv'
experimental_df = pd.read_csv(experimental_df_path)

In [60]:
experimental_comps = experimental_df['Composition'].values
experimental_hardGPa = experimental_df['HardGPa'].values.reshape(-1,1)
experimental_pmg_comps = pymatgen_comp(experimental_comps)
experimental_comps_dset = data_generator(experimental_pmg_comps,property_list,element_name,RC)

In [61]:
exp_enc_input = torch.from_numpy(experimental_comps_dset.real_data.astype('float32'))
cuda = check_cuda()
if cuda:
    exp_enc_input = exp_enc_input.cuda()
exp_PTR_features = PTR_encoder(exp_enc_input).to('cpu').detach().numpy()
X_exp = pca.transform(exp_PTR_features)
y_exp_predict = np.round(scaler_y.inverse_transform(pipe.predict(X_exp).reshape(-1,1)),2)
experimental_df['Iteration 1 predictions'] = y_exp_predict

In [62]:
experimental_df

Unnamed: 0,Composition,HardGPa,Iteration 1 predictions
0,Nb0.37Mo0.52W0.11,5.2,8.16
1,Nb0.49Mo0.07W0.44,5.8,7.11
2,Nb0.57Mo0.33W0.10,5.4,8.26
3,Nb0.32Mo0.44W0.24,5.2,7.4
4,Nb0.425Mo0.167W0.407,6.0,6.85
5,Nb0.33Mo0.327W0.344,5.6,6.47
6,Nb0.17W0.32Zr0.18Ti0.32,4.9,5.32
7,Nb0.06Mo0.03W0.27Zr0.3Ti0.3V0.03,4.4,5.33
8,Nb0.47Mo0.13W0.07Zr0.17Ti0.03V0.13,5.0,5.97


In [15]:
experimental_df.to_csv('dataset/iterative_prediction.csv')

#Iteration 2 - Include first three experimental values in training model 

In [31]:
PTR_features_new = np.concatenate([PTR_features,exp_PTR_features[:3]],axis=0)
hardGPa_new = np.concatenate([hardGPa,experimental_hardGPa[:3]],axis=0)
X_new = pca.transform(PTR_features_new)
y_new = scaler_y.transform(hardGPa_new)

In [63]:
param_grid = {'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,1)],
          'activation': ['relu','tanh','logistic'],
          'alpha': [0.0001, 0.05],
          'learning_rate': ['constant','adaptive'],
          'solver': ['adam']}
gridCV = GridSearchCV(MLPRegressor(batch_size=128), param_grid=param_grid,cv=10)
pipe = Pipeline([('Scaler', StandardScaler()),('grid',gridCV)])
pipe.fit(X_new,y_new.ravel())
joblib.dump(pipe,f'{saved_model_path}/MLP_hardness_iter1.pt')

['saved_models/hardness_models/MLP_hardness_iter1.pt']

In [18]:
pipe = joblib.load('saved_models/hardness_models/MLP_hardness_iter1.pt')

In [64]:
X_exp_new = pca.transform(exp_PTR_features)
y_exp_predict = scaler_y.inverse_transform(pipe.predict(X_exp_new).reshape(-1,1))

In [66]:
experimental_df['Iteration 2 predictions'] = y_exp_predict

In [67]:
experimental_df

Unnamed: 0,Composition,HardGPa,Iteration 1 predictions,Iteration 2 predictions
0,Nb0.37Mo0.52W0.11,5.2,8.16,5.543756
1,Nb0.49Mo0.07W0.44,5.8,7.11,6.112259
2,Nb0.57Mo0.33W0.10,5.4,8.26,5.432451
3,Nb0.32Mo0.44W0.24,5.2,7.4,5.518746
4,Nb0.425Mo0.167W0.407,6.0,6.85,5.920314
5,Nb0.33Mo0.327W0.344,5.6,6.47,5.224339
6,Nb0.17W0.32Zr0.18Ti0.32,4.9,5.32,4.907175
7,Nb0.06Mo0.03W0.27Zr0.3Ti0.3V0.03,4.4,5.33,5.108167
8,Nb0.47Mo0.13W0.07Zr0.17Ti0.03V0.13,5.0,5.97,5.538674
