In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime 
import time

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PowerTransformer, MinMaxScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
#from tensorflow.keras.utils import multi_gpu_model

In [2]:
pd.set_option('display.max_columns', 500)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [4]:
tf.config.experimental.list_physical_devices(device_type = None)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU')]

In [5]:
#TF_XLA_FLAGS=--tf_xla_auto_jit=2 path/to/your/tf/program

In [6]:
#CUDA_VISIBLE_DEVICES = 0

In [7]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [8]:
full_columns = train.columns.tolist()

In [9]:
full_columns.remove('permeability')

In [10]:
X = pd.DataFrame(train, columns = full_columns)

y = train['permeability']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state = 8669, test_size = 0.30
)

In [12]:
ct = ColumnTransformer(transformers = [
    ('yeo-johnson', PowerTransformer(), full_columns),
], remainder = 'passthrough')

pipe = Pipeline(steps = [
    ('transform', ct),
    ('scale', MinMaxScaler())
])

In [13]:
X_train = pipe.fit_transform(X_train)
X_test = pipe.transform(X_test)

In [15]:
#strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"], 
                                          #cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
                                          #cross_device_ops=tf.distribute.ReductionToOneDevice())
   



In [31]:
cross_tower_ops = tf.distribute.HierarchicalCopyAllReduce(num_packs = 1)

strategy = tf.distribute.MirroredStrategy(devices = ['/gpu:0'], cross_device_ops = cross_tower_ops)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [32]:
with strategy.scope():
#with tf.device('/gpu:0'):
    model = Sequential()

    #model.add(Dense(64,activation=ks.layers.LeakyReLU(alpha = 0.01)))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = 'mean_squared_error', optimizer = 'adam', metrics = ['mse'])

In [86]:
#es = EarlyStopping(monitor = 'loss', patience = 25, restore_best_weights = True)
#mc = ModelCheckpoint(filepath = 'test_model.h5', monitor = 'loss', save_best_only=True)
#X_es_train, X_es_test, y_es_train, y_es_test = train_test_split(X_train, y_train, test_size = 0.25, random_state = 8669)

In [87]:
#Creates a dataframe by which we will eventually put in our list created above
#model_record = pd.DataFrame(columns = ['model_num','time','mae','mse'])

In [108]:
#Declares a start time to began keeping time
start_time = datetime.now()

#Temporary list to hold values of the recorded time and other model values
record_list = list()

In [33]:
#with tf.device('/gpu:0'):
history = model.fit(x = X_train,y = y_train.values,
                          batch_size = 64, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [111]:
record_list.extend([len(model_record)+1, 
                    format(datetime.now() - start_time), 
                    mean_absolute_error(y_test, model.predict(X_test)), 
                    mean_squared_error(y_test, model.predict(X_test))
                   ])

In [112]:
model_record.loc[len(model_record)] = record_list

In [113]:
record_list

[8, '0:00:46.734354', 0.3844177726690221, 0.8535248534165079]

In [114]:
model_record

Unnamed: 0,model_num,time,mae,mse
0,1,0:06:31.783906,0.354856,0.878008
1,2,0:00:36.668682,0.336185,0.804366
2,3,0:00:45.524226,0.43042,1.027962
3,4,0:00:42.042400,0.464801,0.956097
4,5,0:00:46.762098,0.45002,1.096582
5,6,0:00:39.952118,0.409797,0.914438
6,7,0:00:16.768155,0.383027,0.897352
7,8,0:00:46.734354,0.384418,0.853525


In [25]:
mean_absolute_error(y_test, model.predict(X_test))

0.47637357176152073

In [26]:
mean_squared_error(y_test, model.predict(X_test))

1.0440733366503838

In [27]:
np.sqrt(mean_squared_error(np.log(y_test + 1), np.log(model.predict(X_test)+1)))

0.08347928865435927

plt.figure(figsize = (10,6))

plt.plot(history.history['mae'], label = 'Training MSE')
plt.plot(history.history['val_mae'], label = 'Test MSE')
plt.plot(history.history['loss'], label = 'Training MAE')
plt.plot(history.history['val_loss'], label = 'Test MAE')

plt.ylim(0, 1)
plt.legend();

In [28]:
#X = pipe.fit_transform(X)

In [29]:
#test = pipe.transform(test)

In [30]:
#results = model.predict(test)

In [31]:
#results.shape

In [32]:
#test_results = pd.DataFrame()

In [33]:
#results = pd.DataFrame(results)

In [34]:
#test_results['permeability'] = results[0]

In [35]:
#test_results.to_csv('data/nn/nn_results_reproduce2.csv', index_label = 'id')