In [1]:
import os
import joblib
import torch.nn as nn
import numpy as np
import pandas as pd
import pickle
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
import random
from modules.function import pymatgen_comp, data_generator_vec

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [3]:
saved_model_path = 'saved_models/hardness_models'
if not os.path.exists(saved_model_path):
  os.makedirs(saved_model_path)

In [4]:
initial_df_path = 'dataset/hardness_combined.csv'
df = pd.read_csv(initial_df_path,index_col=0)
data = df[['Composition','HardGPa']][df['As-cast/other']==1].values
comps = data[:,0]
y = data[:,1].reshape(-1,1)
pmg_comps = pymatgen_comp(comps)
comp_dset = data_generator_vec(pmg_comps)
X, el_list = comp_dset.real_data, comp_dset.elements

In [5]:
np.random.seed(0)
param_grid = {'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,1)],
          'activation': ['relu'],
          'alpha': [0.0001, 0.005, 0.001, 0.05, 0.01],
          'learning_rate': ['constant','adaptive'],
          'max_iter':[200,500,1000],
          'batch_size':[32,64,128,256]
          }
gridCV = GridSearchCV(MLPRegressor(), param_grid=param_grid,cv=10, scoring = 'neg_root_mean_squared_error')
pipe = Pipeline([('Scaler', StandardScaler()),('grid',gridCV)])
pipe.fit(X,y.ravel())

In [6]:
experimental_df_path = 'dataset/experimental_hardness.csv'
experimental_df = pd.read_csv(experimental_df_path)

In [7]:
experimental_comps = experimental_df['Composition'].values
experimental_comps_pmg = pymatgen_comp(experimental_comps)
experimental_dset = data_generator_vec(experimental_comps_pmg,el_list)
experimental_X = experimental_dset.real_data
experimental_y = experimental_df['HardGPa'].values.reshape(-1,1)

In [8]:
pipe.predict(experimental_X), experimental_y

(array([10.554276 , 10.616626 , 12.083549 ,  9.363297 ,  9.80988  ,
         8.731265 ,  4.4888544,  5.8354344, 10.442975 ], dtype=float32),
 array([[5.2],
        [5.8],
        [5.4],
        [5.2],
        [6. ],
        [5.6],
        [4.9],
        [4.4],
        [5. ]]))

In [9]:
pipe['grid'].cv_results_['split9_test_score'].mean()

-2.138186934030892

In [10]:
X_new = np.concatenate((X,experimental_X[:3]))
y_new = np.concatenate((y,experimental_y[:3]))
X_new.shape,y_new.shape

((920, 15), (920, 1))

In [14]:
np.random.seed(0)
param_grid = {'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,1)],
          'activation': ['relu'],
          'alpha': [0.0001, 0.005, 0.001, 0.05, 0.01],
          'learning_rate': ['constant','adaptive'],
          'max_iter':[200,500,1000],
          'batch_size':[32,64,128,256]
          }
gridCV = GridSearchCV(MLPRegressor(), param_grid=param_grid,cv=10)
pipe1 = Pipeline([('Scaler', StandardScaler()),('grid',gridCV)])
pipe1.fit(X_new,y_new.ravel())

In [13]:
pipe1.predict(experimental_X), experimental_y

(array([5.3520865, 5.837278 , 5.57777  , 4.798512 , 5.182418 , 4.211207 ,
        5.835443 , 6.6768126, 3.2837305], dtype=float32),
 array([[5.2],
        [5.8],
        [5.4],
        [5.2],
        [6. ],
        [5.6],
        [4.9],
        [4.4],
        [5. ]]))