In [110]:
import pandas as pd
import numpy as np

from datetime import datetime 
import time

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)

from sklearn.metrics import max_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model

In [111]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [112]:
tf.config.experimental.list_physical_devices(device_type = None)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU')]

In [113]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [114]:
full_columns = train.columns.tolist()

In [115]:
full_columns.remove('permeability')

In [116]:
X = pd.DataFrame(train, columns = full_columns)

y = train['permeability']

In [117]:
X_train, X_true, y_train, y_true = train_test_split(
    X, y, random_state = 8669, test_size = 0.25
)

In [269]:
#Assign the loss function the model will use to train
loss = 'mean_squared_error'

#Declare the number of model iterations we will run 
test_iterations = 4

#Declare the batch size for use in the model
batch_size = 64

#Declare the maximum number of epochs for our model
epochs = 3

In [270]:
with tf.device('/gpu:0'):
    model = Sequential()

    #model.add(Dense(128,activation=ks.layers.LeakyReLU(alpha = 0.01)))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(128, activation = 'relu'))
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = loss, optimizer = 'adam', metrics = ['mse'])

In [271]:
#es = EarlyStopping(monitor = 'loss', patience = 25, restore_best_weights = True)
#mc = ModelCheckpoint(filepath = 'test_model.h5', monitor = 'loss', save_best_only=True)
#X_es_train, X_es_test, y_es_train, y_es_test = train_test_split(X_train, y_train, test_size = 0.25, random_state = 8669)

In [309]:
#Creates a dataframe by which we will eventually put in our list created above
model_record = pd.DataFrame(columns = ['model_num', 'loss_type', 'time', 'r2', 'mae', 'mse', 'rmse', 'max_error'])

#Creates a dataframe by which our model's predicted values and true values will be stored
predict_record = pd.DataFrame(y_true).reset_index(drop = True)

#Creates a numpy array by which the for loop will use to count model runs and is then used to name df columns
model_counter = np.array([0])

In [310]:
with tf.device('/gpu:0'):
    for i in range(test_iterations):
        model.compile(loss = loss, optimizer = 'adam', metrics = ['mse'])
        
        record_list = list()
        model_counter = model_counter + 1
        start_time = datetime.now() 
        
        model.fit(x = X_train, y = y_train.values, 
                  batch_size = batch_size, epochs = 3)
        
        y_pred = model.predict(X_true)
        
        record_list.extend([len(model_record)+1,
                            loss, 
                            format(datetime.now() - start_time),
                            r2_score(y_true, y_pred),
                            mean_absolute_error(y_true, y_pred), 
                            mean_squared_error(y_true, y_pred), 
                            np.sqrt(mean_squared_error(y_true, y_pred)), 
                            max_error(y_true, y_pred)
                           ])        

        model_record.loc[len(model_record)] = record_list
        
        y_pred_df = pd.DataFrame(y_pred)
        
        predict_record = pd.concat([predict_record, y_pred_df], axis = 1)
        predict_record = predict_record.rename(columns = {0 : 'm' + str(model_counter[0])})
        
        residuals_df = pd.DataFrame(abs(predict_record.iloc[:,len(predict_record.columns)-1] - predict_record.iloc[:,0]))
        
        predict_record = pd.concat([predict_record, residuals_df], axis = 1)
        predict_record = predict_record.rename(columns = {0 : 'res' + str(model_counter[0])})

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [311]:
model_record

Unnamed: 0,model_num,loss_type,time,r2,mae,mse,rmse,max_error
0,1,mean_squared_error,0:00:01.672796,0.983808,0.464626,1.231934,1.109925,20.805952
1,2,mean_squared_error,0:00:01.677296,0.977724,0.618598,1.694902,1.301884,22.308333
2,3,mean_squared_error,0:00:01.645291,0.981833,0.497529,1.382226,1.175681,20.568579
3,4,mean_squared_error,0:00:01.824822,0.981363,0.529671,1.41796,1.190781,22.35948


In [313]:
predict_record

Unnamed: 0,permeability,m1,res1,m2,res2,m3,res3,m4,res4
0,5.181770,6.064264,0.882494,5.534821,0.353051,6.282464,1.100694,5.681453,0.499683
1,4.555260,5.123527,0.568267,4.699389,0.144129,5.307390,0.752130,4.890561,0.335301
2,21.224200,21.595940,0.371740,19.411787,1.812413,22.177645,0.953445,20.225426,0.998774
3,0.787535,0.541605,0.245930,0.560904,0.226631,0.631259,0.156276,0.462413,0.325122
4,7.719570,8.196838,0.477268,7.477455,0.242115,8.505194,0.785624,7.739031,0.019461
...,...,...,...,...,...,...,...,...,...
7495,1.291720,1.554233,0.262513,1.434521,0.142801,1.621400,0.329680,1.446961,0.155241
7496,0.189488,0.105246,0.084242,0.127285,0.062203,0.133374,0.056114,-0.007192,0.196680
7497,0.774052,0.804872,0.030820,0.767088,0.006964,0.852874,0.078822,0.720104,0.053948
7498,11.322600,11.464479,0.141879,10.274263,1.048337,11.816126,0.493526,10.793269,0.529331


In [207]:
#model_record.to_csv('data/rnn_results_1.csv')
#predict_record.to_csv('data/rnn_predictions_1.csv')