In [1]:
import os
import joblib
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle
import torch
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
import random
from modules.encoder import Encoder, Identity
from modules.function import pymatgen_comp, data_generator, check_cuda, stratify_data

In [2]:
random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x7f40e11b58d0>

In [9]:
saved_model_path = 'saved_models/hardness_models'
if not os.path.exists(saved_model_path):
  os.makedirs(saved_model_path)

In [3]:
gfa_dataset_file = 'gfa_dataset.txt'
z_row_column_file = 'Z_row_column.txt'
element_property_file = 'element_property.txt'
common_path = "Files_from_GTDL_paper/{}" 
gfa_dataset = pickle.load(open(common_path.format(gfa_dataset_file), 'rb'))  
RC = pickle.load(open(common_path.format(z_row_column_file), 'rb')) 
new_index=[int(i[4]) for i in RC]#new order 
Z_row_column = pickle.load(open(common_path.format(z_row_column_file), 'rb'))
[property_name_list,property_list,element_name,_]=pickle.load(open(common_path.format(element_property_file), 'rb'))

In [4]:
saved_models_path = 'saved_models'
type = 'PTR'
filename = 'PTR_Encoder.pt'
if os.path.exists(f'{saved_models_path}/{type}/{filename}'):
    PTR_encoder =  joblib.load(f'{saved_models_path}/{type}/{filename}')
else:
    print('No file found!')

PTR_encoder.mapf = Identity()

In [5]:
initial_df_path = 'dataset/hardness_combined.csv'
df = pd.read_csv(initial_df_path,index_col=0)
data = df[['Composition','HardGPa']][df['As-cast/other']==1].values
comps = data[:,0]
hardGPa = data[:,1].reshape(-1,1)
pmg_comps = pymatgen_comp(comps)
comps_dset = data_generator(pmg_comps,property_list,element_name,RC)

In [6]:
enc_input = torch.from_numpy(comps_dset.real_data.astype('float32'))
cuda = check_cuda()
if cuda:
    enc_input = enc_input.cuda()
PTR_features = PTR_encoder(enc_input).to('cpu').detach().numpy()

In [7]:
pca = PCA(n_components=10)
scaler_y = StandardScaler()
X = pca.fit_transform(PTR_features)
y = scaler_y.fit_transform(hardGPa)

In [8]:
param_grid = {'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,1)],
          'activation': ['relu','tanh','logistic'],
          'alpha': [0.0001, 0.05],
          'learning_rate': ['constant','adaptive'],
          'solver': ['adam']}
gridCV = GridSearchCV(MLPRegressor(batch_size=16,max_iter=1000), param_grid=param_grid,cv=10)
pipe = Pipeline([('Scaler', StandardScaler()),('grid',gridCV)])
pipe.fit(X,y.ravel())
joblib.dump(pipe,f'{saved_model_path}/MLP_hardness_iter0.pt')

['saved_models/hardness_models/MLP_hardness_iter0.pt']