In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
import time

In [2]:
with h5py.File('./MAIN_3D_data_2023-10-31_23-25-51.h5') as F:
  potential_train = F['potential'][:]
  energy_train = F['energy'][:]
  Z_train = F['Z'][:]
  kinetic_train = F['kinetic'][:]

In [3]:
with h5py.File('./MAIN_3D_data_2023-11-01_01-04-15.h5') as F:
  potential_test = F['potential'][:]
  energy_test = F['energy'][:]
  Z_test = F['Z'][:]
  kinetic_test = F['kinetic'][:]

In [4]:
# X_train, X_test, y_train, y_test = train_test_split(potential[:, :, 0], energy, test_size=0.2, random_state=42)
X_train = potential_train[:, :, 0]
y_train = kinetic_train.ravel()
X_test = potential_test[:, :, 0]
y_test = kinetic_test.ravel()

In [5]:
model_KNN = KNeighborsRegressor(n_neighbors=5)
model_RF = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)
model_SVR = SVR(kernel='rbf', C=1e3, gamma=0.1)
model_XGB = XGBRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1)

In [6]:
then = time.time()
model_KNN.fit(X_train, y_train)
now = time.time()
print('KNN: ', (now - then)/60, 'min')
print()

then = time.time()
model_RF.fit(X_train, y_train)
now = time.time()
print('RF: ', (now - then)/60, 'min')
print()

# then = time.time()
# model_SVR.fit(X_train, y_train)
# now = time.time()
# print('SVR: ', (now - then)/60, 'min')
# print()

then = time.time()
model_XGB.fit(X_train, y_train)
now = time.time()
print('XGB: ', (now - then)/60, 'min')
print()


KNN:  0.002076562245686849 min

RF:  25.87881801525752 min

XGB:  11.287289758523306 min



In [7]:
then = time.time()
y_pred_KNN = model_KNN.predict(X_test)
now = time.time()
print('KNN: ', (now - then)/60, 'min')
print()

then = time.time()
y_pred_RF = model_RF.predict(X_test)
now = time.time()
print('RF: ', (now - then)/60, 'min')
print()

then = time.time()
y_pred_XGB = model_XGB.predict(X_test)
now = time.time()
print('XGB: ', (now - then)/60, 'min')
print()

KNN:  0.17166006565093994 min

RF:  0.0022661407788594564 min

XGB:  0.0026274681091308593 min



In [8]:
# error times 1000 and rounded to 3 decimal places
print('MAE KNN: ', 1000*mean_absolute_error(y_test, y_pred_KNN).round(5))
print('MAE RF: ', 1000*mean_absolute_error(y_test, y_pred_RF).round(5))
print('MAE XGB: ', 1000*mean_absolute_error(y_test, y_pred_XGB).round(5))

print('MAE KNN: ', mean_absolute_error(y_test, y_pred_KNN))
print('MAE RF: ', mean_absolute_error(y_test, y_pred_RF))
print('MAE XGB: ', mean_absolute_error(y_test, y_pred_XGB))

MAE KNN:  8.0
MAE RF:  14.69
MAE XGB:  9.5
MAE KNN:  0.007996487486624468
MAE RF:  0.014693888139332
MAE XGB:  0.009497533157685116


In [9]:
Z_test_hydrogen = np.asarray(Z_test.flatten()==1).nonzero()[0]
Z_test_helium = np.asarray(Z_test.flatten()==2).nonzero()[0]
Z_test_lithium = np.asarray(Z_test.flatten()==3).nonzero()[0]
Z_test_beryllium = np.asarray(Z_test.flatten()==4).nonzero()[0]

In [10]:
y_pred_KNN_hydrogen = y_pred_KNN[Z_test_hydrogen]
y_pred_KNN_helium = y_pred_KNN[Z_test_helium]
y_pred_KNN_lithium = y_pred_KNN[Z_test_lithium]
y_pred_KNN_beryllium = y_pred_KNN[Z_test_beryllium]

y_pred_RF_hydrogen = y_pred_RF[Z_test_hydrogen]
y_pred_RF_helium = y_pred_RF[Z_test_helium]
y_pred_RF_lithium = y_pred_RF[Z_test_lithium]
y_pred_RF_beryllium = y_pred_RF[Z_test_beryllium]

y_pred_XGB_hydrogen = y_pred_XGB[Z_test_hydrogen]
y_pred_XGB_helium = y_pred_XGB[Z_test_helium]
y_pred_XGB_lithium = y_pred_XGB[Z_test_lithium]
y_pred_XGB_beryllium = y_pred_XGB[Z_test_beryllium]

error_KNN_hydrogen = 1000*mean_absolute_error(y_test[Z_test_hydrogen], y_pred_KNN_hydrogen).round(5)
error_KNN_helium = 1000*mean_absolute_error(y_test[Z_test_helium], y_pred_KNN_helium).round(5)
error_KNN_lithium = 1000*mean_absolute_error(y_test[Z_test_lithium], y_pred_KNN_lithium).round(5)
error_KNN_beryllium = 1000*mean_absolute_error(y_test[Z_test_beryllium], y_pred_KNN_beryllium).round(5)

error_RF_hydrogen = 1000*mean_absolute_error(y_test[Z_test_hydrogen], y_pred_RF_hydrogen).round(5)
error_RF_helium = 1000*mean_absolute_error(y_test[Z_test_helium], y_pred_RF_helium).round(5)
error_RF_lithium = 1000*mean_absolute_error(y_test[Z_test_lithium], y_pred_RF_lithium).round(5)
error_RF_beryllium = 1000*mean_absolute_error(y_test[Z_test_beryllium], y_pred_RF_beryllium).round(5)

error_XGB_hydrogen = 1000*mean_absolute_error(y_test[Z_test_hydrogen], y_pred_XGB_hydrogen).round(5)
error_XGB_helium = 1000*mean_absolute_error(y_test[Z_test_helium], y_pred_XGB_helium).round(5)
error_XGB_lithium = 1000*mean_absolute_error(y_test[Z_test_lithium], y_pred_XGB_lithium).round(5)
error_XGB_beryllium = 1000*mean_absolute_error(y_test[Z_test_beryllium], y_pred_XGB_beryllium).round(5)


In [11]:
print('MAE KNN hydrogen: ', error_KNN_hydrogen)
print('MAE KNN helium: ', error_KNN_helium)
print('MAE KNN lithium: ', error_KNN_lithium)
print('MAE KNN beryllium: ', error_KNN_beryllium)

print('MAE RF hydrogen: ', error_RF_hydrogen)
print('MAE RF helium: ', error_RF_helium)
print('MAE RF lithium: ', error_RF_lithium)
print('MAE RF beryllium: ', error_RF_beryllium)

print('MAE XGB hydrogen: ', error_XGB_hydrogen)
print('MAE XGB helium: ', error_XGB_helium)
print('MAE XGB lithium: ', error_XGB_lithium)
print('MAE XGB beryllium: ', error_XGB_beryllium)


MAE KNN hydrogen:  1.05
MAE KNN helium:  4.1000000000000005
MAE KNN lithium:  9.889999999999999
MAE KNN beryllium:  16.95
MAE RF hydrogen:  1.8699999999999999
MAE RF helium:  8.319999999999999
MAE RF lithium:  16.47
MAE RF beryllium:  32.11
MAE XGB hydrogen:  1.1900000000000002
MAE XGB helium:  3.93
MAE XGB lithium:  16.75
MAE XGB beryllium:  16.119999999999997


In [12]:
# # save all model
# import pickle
# pickle.dump(model_KNN, open('model_KNN.sav', 'wb'))
# pickle.dump(model_RF, open('model_RF.sav', 'wb'))
# pickle.dump(model_XGB, open('model_XGB.sav', 'wb'))