In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from datetime import datetime 
import time

import warnings
warnings.filterwarnings('ignore',category=FutureWarning)

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PowerTransformer, MinMaxScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.metrics import RootMeanSquaredError
#from tensorflow.keras.utils import multi_gpu_model

In [2]:
pd.set_option('display.max_columns', 500)

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]

In [4]:
tf.config.experimental.list_physical_devices(device_type = None)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:XLA_CPU:0', device_type='XLA_CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),
 PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:0', device_type='XLA_GPU'),
 PhysicalDevice(name='/physical_device:XLA_GPU:1', device_type='XLA_GPU')]

In [5]:
#TF_XLA_FLAGS=--tf_xla_auto_jit=2 path/to/your/tf/program

In [6]:
#CUDA_VISIBLE_DEVICES = 0

In [7]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [8]:
full_columns = train.columns.tolist()

In [9]:
full_columns.remove('permeability')

In [10]:
X = pd.DataFrame(train, columns = full_columns)

y = train['permeability']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state = 8669, test_size = 0.30
)

In [12]:
ct = ColumnTransformer(transformers = [
    ('yeo-johnson', PowerTransformer(), full_columns),
], remainder = 'passthrough')

pipe = Pipeline(steps = [
    ('transform', ct),
    ('scale', MinMaxScaler())
])

In [13]:
X_train = pipe.fit_transform(X_train)
X_test = pipe.transform(X_test)

In [14]:
#strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"], 
                                          #cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
                                          #cross_device_ops=tf.distribute.ReductionToOneDevice())
   



In [15]:
cross_tower_ops = tf.distribute.HierarchicalCopyAllReduce()

strategy = tf.distribute.MirroredStrategy(devices = ['/gpu:0'], cross_device_ops = cross_tower_ops)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)


In [16]:
loss = 'mean_squared_error'

In [17]:
#with strategy.scope():
with tf.device('/gpu:0'):
    model = Sequential()

    #model.add(Dense(64,activation=ks.layers.LeakyReLU(alpha = 0.01)))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(32, activation = 'relu'))
    model.add(Dense(1, activation='linear'))
    
    model.compile(loss = loss, optimizer = 'adam', metrics = ['mse'])

In [18]:
#es = EarlyStopping(monitor = 'loss', patience = 25, restore_best_weights = True)
#mc = ModelCheckpoint(filepath = 'test_model.h5', monitor = 'loss', save_best_only=True)
#X_es_train, X_es_test, y_es_train, y_es_test = train_test_split(X_train, y_train, test_size = 0.25, random_state = 8669)

In [19]:
#Creates a dataframe by which we will eventually put in our list created above
model_record = pd.DataFrame(columns = ['model_num', 'loss_type','time','mae','mse', 'rmse'])

In [179]:
#Declares a start time to began keeping time
start_time = datetime.now()

#Temporary list to hold values of the recorded time and other model values
record_list = list()
predict_record = pd.DataFrame(columns = [])

In [109]:
with tf.device('/gpu:0'):
    history = model.fit(x = X_train,y = y_train.values,
                          batch_size = 64, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [110]:
record_list.extend([len(model_record)+1,
                    loss, 
                    format(datetime.now() - start_time), 
                    mean_absolute_error(y_test, model.predict(X_test)), 
                    mean_squared_error(y_test, model.predict(X_test)), 
                    np.sqrt(mean_squared_error(y_test, model.predict(X_test)))
                   ])

In [111]:
model_record.loc[len(model_record)] = record_list

In [112]:
y_pred = model.predict(X_test)

In [191]:
y_test_df = pd.DataFrame(y_test)
y_pred_df = pd.DataFrame(model.predict(X_test))

In [193]:
y_test_df.reset_index(inplace = True)

In [182]:
y_pred_df

Unnamed: 0,0
0,5.681249
1,4.446689
2,22.785587
3,0.830394
4,7.406282
...,...
7495,1.378931
7496,0.034193
7497,0.677342
7498,10.584839


In [183]:
predict_record = pd.concat([predict_record, y_test_df], axis = 1, ignore_index = True)

In [185]:
predict_record = pd.concat([predict_record, y_pred_df], axis = 1, ignore_index = True)

Loss Type
Accuracy
Mean Absolute Error
Mean Square Error
Root Mean Squared Error

Accuracy? 



In [186]:
predict_record

Unnamed: 0,0,1
0,,5.681249
1,,4.446689
2,5.892080,22.785587
3,,0.830394
4,,7.406282
...,...,...
24969,4.642320,
24970,0.318776,
24974,6.047100,
24983,0.847674,


In [29]:
mean_absolute_error(y_test, model.predict(X_test))

0.6830927143413164

In [30]:
mean_squared_error(y_test, model.predict(X_test))

1.766487190124079

In [31]:
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

1.3290926190917167

In [32]:
#def rmse(predictions, targets): 
#    return np.sqrt(mean_squared_error(predictions, targets))

#rmse(y_test, y_pred)

In [33]:
#X = pipe.fit_transform(X)

In [34]:
#test = pipe.transform(test)

In [35]:
#results = model.predict(test)

In [36]:
#results.shape

In [37]:
#test_results = pd.DataFrame()

In [38]:
#results = pd.DataFrame(results)

In [39]:
#test_results['permeability'] = results[0]

In [40]:
#test_results.to_csv('data/nn/nn_results_reproduce2.csv', index_label = 'id')