In [None]:
from google.colab import drive 
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!pip install pyyaml h5py



In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

from sklearn import metrics

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.utils import normalize
from keras.layers.advanced_activations import LeakyReLU

In [None]:
features = pd.read_csv("gdrive/My Drive/cofs/combined_features.csv")
properties = pd.read_csv('gdrive/My Drive/cofs/properties.csv')

In [None]:
# composition and structure to predict CO2Qst

x = features
y = properties["CO2Qst_kJ_mol_"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

rf = RandomForestRegressor()
rf.fit(x_train_scaled, y_train)

y_pred_rf = rf.predict(x_test_scaled)
print('R^2:', metrics.r2_score(y_test, y_pred_rf))

R^2: 0.8281150631948342


In [None]:
# composition only to predict CO2Qst

x = features.drop(['density_kg_m_3_', 'poreVolume_cm_3_g_',
       'largestIncludedSphereDiameter_A_', 'largestFreeSphereDiameter_A_',
       'largestIncludedSphereAlongFreeSpherePathDiameter_A_',
       'voidFraction_widom_'], axis = 1)
y = properties["CO2Qst_kJ_mol_"]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

rf = RandomForestRegressor()
rf.fit(x_train_scaled, y_train)

y_pred_rf = rf.predict(x_test_scaled)
print('R^2:', metrics.r2_score(y_test, y_pred_rf))

R^2: 0.2994732664229056


In [None]:
# composition only to predict structural features

x = features.drop(['density_kg_m_3_', 'poreVolume_cm_3_g_',
       'largestIncludedSphereDiameter_A_', 'largestFreeSphereDiameter_A_',
       'largestIncludedSphereAlongFreeSpherePathDiameter_A_',
       'voidFraction_widom_'], axis = 1)
y_dens = features["density_kg_m_3_"]

x_train, x_test, y_train, y_test = train_test_split(x, y_dens, test_size = 0.25, random_state = 42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

rf = RandomForestRegressor()
rf.fit(x_train_scaled, y_train)

y_pred_rf = rf.predict(x_test_scaled)
print('R^2:', metrics.r2_score(y_test, y_pred_rf))

R^2: 0.5406536825447104


In [None]:
# composition only to predict structural features

x = features.drop(['density_kg_m_3_', 'poreVolume_cm_3_g_',
       'largestIncludedSphereDiameter_A_', 'largestFreeSphereDiameter_A_',
       'largestIncludedSphereAlongFreeSpherePathDiameter_A_',
       'voidFraction_widom_'], axis = 1)
y_dens = features["density_kg_m_3_"]

x_train, x_test, y_train, y_test = train_test_split(x, y_dens, test_size = 0.25, random_state = 42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

rf = RandomForestRegressor()

param_grid = {
    "n_estimators":[100,200,300,400,500,600,700,800,900,1000]
}

CV_rf = GridSearchCV(
    estimator = rf, 
    param_grid = param_grid,
)

CV_rf.fit(x_train_scaled, y_train)

print(g_search.best_params_)

KeyboardInterrupt: ignored

In [None]:
# composition only to predict structural features

x = features.drop(['density_kg_m_3_', 'poreVolume_cm_3_g_',
       'largestIncludedSphereDiameter_A_', 'largestFreeSphereDiameter_A_',
       'largestIncludedSphereAlongFreeSpherePathDiameter_A_',
       'voidFraction_widom_'], axis = 1)
y_dens = features["density_kg_m_3_"]

x_train, x_test, y_train, y_test = train_test_split(x, y_dens, test_size = 0.25, random_state = 42)

scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

rf = RandomForestRegressor(n_estimators = 800)
rf.fit(x_train_scaled, y_train)

y_pred_rf = rf.predict(x_test_scaled)
print('R^2:', metrics.r2_score(y_test, y_pred_rf))

R^2: 0.542246406862904
