# Test to Ensure GPU Functional and Available

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
from  IPython import display
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from sklearn import datasets
import pandas as pd
import numpy as np
import datetime
import os
from matplotlib import pyplot as plt
import pathlib
import shutil
import tempfile
import time
print(tf.__version__)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

__DD:__ Check to see if CUDA GPU is available:

In [None]:
tf.test.is_gpu_available(
    cuda_only=True, min_cuda_compute_capability=None
)

__DD:__ Try new function based on warnings:

In [None]:
tf.config.list_physical_devices()

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

__DD:__ I was wondering how to make sure GPU does the processing rather than CPU. I found the following excerpt from Tensorflow documentation:  

"If a TensorFlow operation has both CPU and GPU implementations, by default the GPU devices will be given priority when the operation is assigned to a device. For example, tf.matmul has both CPU and GPU kernels. On a system with devices CPU:0 and GPU:0, the GPU:0 device will be selected to run tf.matmul unless you explicitly request running it on another device."  

## Case Study 12

In [None]:
#os.getcwd()
#os.chdir('C:\\Users\\danie\\Documents\\GitHub\\Quantifying-The-World\\Case Study 6\\data')

__DD:__ To get the tensorflow docs packages working I had to clone the github repository to my local file system and run:  
_pip install -q C:\Users\danie\Documents\GitHub\docs_

In [None]:
#get data
gz = tf.keras.utils.get_file('HIGGS.csv.gz', 'https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz')
FEATURES = 28
ds = tf.data.experimental.CsvDataset(gz,[float(),]*(FEATURES+1), compression_type="GZIP")

def pack_row(*row):
  label = row[0]
  features = tf.stack(row[1:],1)
  return features, label

packed_ds = ds.batch(10000).map(pack_row).unbatch()

In [None]:
for features,label in packed_ds.batch(1000).take(1):
  print(features[0])
  plt.hist(features.numpy().flatten(), bins = 101)

__DD:__ This is where we will tweek to mimic the article

In [None]:
#2.5 million train and 100000 validation as per article
#Batch suze = 1000 per article's Github

N_VALIDATION = int(100000)
N_TRAIN = int(2500000)
BUFFER_SIZE = int(2500000)
BATCH_SIZE = 1000
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE

In [None]:
validate_ds = packed_ds.take(N_VALIDATION).cache()
train_ds = packed_ds.skip(N_VALIDATION).take(N_TRAIN).cache()
validate_ds = validate_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE)

In [None]:
reproduced_model = tf.keras.Sequential([
    layers.Dense(300, activation='tanh',
                 kernel_regularizer=regularizers.l2(0.00001),
                 input_shape=(FEATURES,)),
    layers.Dense(300, activation='tanh'),
    layers.Dropout(0.5),
#    layers.Dense(300, activation='tanh'),
#    layers.Dense(300, activation='tanh'),
    layers.Dense(1)
])
reproduced_model.summary()

In [None]:
reproduced_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.05), loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=[tf.keras.losses.BinaryCrossentropy(from_logits=True,name='binary_crossentropy'),
                  'accuracy'], run_eagerly=False)

In [None]:
time.strftime('%H:%M%p %Z on %b %d, %Y')

In [None]:
#callback = tf.keras.callbacks.EarlyStopping(monitor='binary_crossentropy', patience=10)

reproduced_model.fit(train_ds,
    steps_per_epoch = STEPS_PER_EPOCH,
    epochs=10000,
    callbacks=[callback],
    validation_data=validate_ds,
    verbose=0
)

In [None]:
time.strftime('%H:%M%p %Z on %b %d, %Y')

In [None]:
reproduced_model.evaluate(validate_ds)