In [110]:
import pandas as pd
import numpy as np

from datetime import datetime 
import time

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)

from sklearn.metrics import max_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras import layers
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential, load_model

In [111]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [112]:
tf.config.experimental.list_physical_devices(device_type = None)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU')]

In [113]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [114]:
full_columns = train.columns.tolist()

In [115]:
full_columns.remove('permeability')

In [116]:
X = pd.DataFrame(train, columns = full_columns)

y = train['permeability']

In [117]:
X_train, X_true, y_train, y_true = train_test_split(
    X, y, random_state = 8669, test_size = 0.25
)

In [333]:
#Declare the number of model iterations we will run 
test_iterations = 2

#Declare the number of hidden layers to add
layers = 4

#Declare the density of the hidden layers
density = 128

#Declare the type of activation for the hidden layers
activation = 'relu'

#Assign the loss function the model will use to train
loss = 'mean_squared_error'

#Declare the batch size for use in the model
batch_size = 64

#Declare the maximum number of epochs for our model
epochs = 3

In [334]:
#Assigns the computation to be performed via GPU Device:0
with tf.device('/gpu:0'):
    model = Sequential()

    #Creates a for loop that will add the number of layers based on the variable declared in the previous cell    
    for i in range(layers):
        #Adds a model layer with density and activation based on the variables declared in the previous cell
        model.add(Dense(density, activation = activation))
    
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = loss, optimizer = 'adam', metrics = ['mse'])

In [335]:
#es = EarlyStopping(monitor = 'loss', patience = 25, restore_best_weights = True)
#mc = ModelCheckpoint(filepath = 'test_model.h5', monitor = 'loss', save_best_only=True)
#X_es_train, X_es_test, y_es_train, y_es_test = train_test_split(X_train, y_train, test_size = 0.25, random_state = 8669)

In [336]:
#Creates a dataframe by which we will eventually put in our list created above
model_record = pd.DataFrame(columns = ['model_num', 'loss_type', 'time', 'r2', 'mae', 'mse', 'rmse', 'max_error'])

#Creates a dataframe by which our model's predicted values and true values will be stored
predict_record = pd.DataFrame(y_true).reset_index(drop = True)

#Creates a numpy array by which the for loop will use to count model runs and is then used to name df columns
model_counter = np.array([0])

In [337]:
#Assigns the computation to be performed via GPU Device:0
with tf.device('/gpu:0'):
    
#Performs model training repeatedly based on the variable declared previously    
    for i in range(test_iterations):
        model.compile(loss = loss, optimizer = 'adam', metrics = ['mse'])

        #Creates an empty list for storing model metrics and other information 
        record_list = list() 
        #Starts a counter that adds 1 everytime a fitting is performed
        model_counter = model_counter + 1
        #Starts a timer to end after the round of fitting is complete
        start_time = datetime.now()
        
        #Fits our model/batch_size and epochs are declared previously
        model.fit(x = X_train, y = y_train.values, 
                  batch_size = batch_size, epochs = epochs)
        
        #Saves our model predictions
        y_pred = model.predict(X_true)
        
        #Saves our model run #, time to run, and model metrics to our temporary list
        record_list.extend([len(model_record)+1,
                            loss, 
                            format(datetime.now() - start_time),
                            r2_score(y_true, y_pred),
                            mean_absolute_error(y_true, y_pred), 
                            mean_squared_error(y_true, y_pred), 
                            np.sqrt(mean_squared_error(y_true, y_pred)), 
                            max_error(y_true, y_pred)
                           ])        
        
        #Adds the temporary list of model metrics (etc) to the end of a dataframe
        model_record.loc[len(model_record)] = record_list
        
        #Converts our predictions to a dataframe so it will play nice
        y_pred_df = pd.DataFrame(y_pred)
        
        #Adds predictions as a column to the end of a dataframe and names is accordingly
        predict_record = pd.concat([predict_record, y_pred_df], axis = 1)
        predict_record = predict_record.rename(columns = {0 : 'm' + str(model_counter[0])})
        
        #Calculates the residual values for each prediction and stores it as a dataframe 
        residuals_df = pd.DataFrame(abs(predict_record.iloc[:,len(predict_record.columns)-1] - predict_record.iloc[:,0]))
        
        #Adds y residuals as a column to the end of a dataframe and names is accordingly
        predict_record = pd.concat([predict_record, residuals_df], axis = 1)
        predict_record = predict_record.rename(columns = {0 : 'res' + str(model_counter[0])})

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [338]:
model_record

Unnamed: 0,model_num,loss_type,time,r2,mae,mse,rmse,max_error
0,1,mean_squared_error,0:00:10.143274,0.935792,0.935586,4.885278,2.210266,22.329377
1,2,mean_squared_error,0:00:10.100265,0.982783,0.504617,1.309935,1.144524,20.857359


In [339]:
predict_record

Unnamed: 0,permeability,m1,res1,m2,res2
0,5.181770,5.883117,0.701347,5.839814,0.658044
1,4.555260,5.317620,0.762360,5.215045,0.659785
2,21.224200,24.989389,3.765189,20.695509,0.528691
3,0.787535,0.411944,0.375591,0.421174,0.366361
4,7.719570,8.250360,0.530790,8.039955,0.320385
...,...,...,...,...,...
7495,1.291720,1.412444,0.120724,1.402342,0.110622
7496,0.189488,0.257418,0.067930,0.313389,0.123901
7497,0.774052,0.660407,0.113645,0.674692,0.099360
7498,11.322600,12.417261,1.094661,11.124081,0.198519


In [340]:
#Nifty call to confirm our variables were properly inputted into our model
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_32 (Dense)             (None, 128)               37376     
_________________________________________________________________
dense_33 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_34 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_35 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_36 (Dense)             (None, 1)                 129       
Total params: 87,041
Trainable params: 87,041
Non-trainable params: 0
_________________________________________________________________


In [207]:
#model_record.to_csv('data/rnn_results_1.csv')
#predict_record.to_csv('data/rnn_predictions_1.csv')