# Imports

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
from tabulate import tabulate
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.utils import shuffle
from keras.callbacks import CSVLogger

# Data Preperation

### Load Data into Training, Cross Validation, and Testing Data Frames

In [2]:
cities = ['boston', 'london', 'merthyr', 'nottingham', 'scarhill', 'southampton', 'stevenage']
 
wavelengths = ['0.91595','0.449425','1.8025','2.695','3.6025','5.85']

train_X = pd.DataFrame()
cv_X = pd.DataFrame()
test_X = pd.DataFrame()

for city in cities:
    for wavelength in wavelengths:
        if (city == 'merthyr'):
            df = pd.read_csv(f"../Data/trainingDataset/{city}{wavelength}.csv")
            cv_X = pd.concat([cv_X, df])
    
        elif (city == 'stevenage'):
            df = pd.read_csv(f"../Data/trainingDataset/{city}{wavelength}.csv")
            test_X = pd.concat([test_X, df])

        else:
            df = pd.read_csv(f"../Data/trainingDataset/{city}{wavelength}.csv")
            train_X = pd.concat([train_X, df])

### Drop N/A's

In [3]:
print(train_X.isna().sum())
train_X = train_X.dropna()
cv_X = cv_X.dropna()
test_X = test_X.dropna()
print(train_X.isna().sum())

Frequency                  0
Power Loss               631
Distance                 631
Height Difference        631
Peak Avg. Height Diff    631
Peak Avg. Dist.          631
Max Peak                 631
Peak Count               631
dtype: int64
Frequency                0
Power Loss               0
Distance                 0
Height Difference        0
Peak Avg. Height Diff    0
Peak Avg. Dist.          0
Max Peak                 0
Peak Count               0
dtype: int64


### Shuffle datasets for future plotting purposes

In [4]:
train_X = shuffle(train_X)
cv_X = shuffle(cv_X)
test_X = shuffle(test_X)

### Set Targets

In [5]:
train_Y = train_X.pop("Power Loss")
cv_Y = cv_X.pop("Power Loss")
test_Y = test_X.pop("Power Loss")

### Remove non-needed features

In [6]:
train_X.pop('Peak Avg. Height Diff')
train_X.pop('Peak Avg. Dist.')
train_X.pop('Max Peak')
train_X.pop('Peak Count')

cv_X.pop('Peak Avg. Height Diff')
cv_X.pop('Peak Avg. Dist.')
cv_X.pop('Max Peak')
cv_X.pop('Peak Count')

test_X.pop('Peak Avg. Height Diff')
test_X.pop('Peak Avg. Dist.')
test_X.pop('Max Peak')
test_X.pop('Peak Count')

158668    28.0
104905    18.0
64288     29.0
34134     54.0
18206      1.0
          ... 
120368    17.0
26247     18.0
215990    16.0
12429     33.0
10554     15.0
Name: Peak Count, Length: 708053, dtype: float64

### Define Universal CallBack

In [7]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

### Create Normalization Layer

In [9]:
normalizerXL = tf.keras.layers.Normalization(axis=-1)

normalizerXL.adapt(np.array(train_X))

print(normalizerXL.mean.numpy())

Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-10-29 18:38:36.297251: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-10-29 18:38:36.297406: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2022-10-29 18:38:36.378355: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-10-29 18:38:36.460172: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-10-29 18:38:36.480758: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


[[  3.5693133  12.714654  -73.43517  ]]


### Create Dictionary for Saving Results

In [10]:
test_results = {}

cv_results = {}

# Function Definitions

### Batch Size Grid Search

In [11]:
def batch_grid_search(batch_size):
    model = keras.Sequential([
        normalizerXL,
        layers.Dense(64, activation = 'relu'),
        layers.Dense(64, activation = 'relu'),
        layers.Dense(1)
    ])

    model.compile(
        loss = 'mean_absolute_error',
        optimizer = keras.optimizers.Adam(learning_rate = 0.001)
    )

    history_dnn = model.fit(
        train_X,
        train_Y,
        epochs = 500,
        batch_size = batch_size,
        validation_data = (cv_X, cv_Y),
        verbose = 1,
        callbacks=[callback])

    test_results[f"Batch Size: {batch_size}"] = model.evaluate(
        test_X,
        test_Y,
        verbose = 1)

    cv_results[f"Batch Size: {batch_size}"] = model.evaluate(
        cv_X,
        cv_Y,
        verbose = 1)

### Number of Units Grid Search

In [12]:
def unit_grid_search(num_of_units):
    model = keras.Sequential([
        normalizerXL,
        layers.Dense(num_of_units, activation = 'relu'),
        layers.Dense(num_of_units, activation = 'relu'),
        layers.Dense(1)
    ])

    model.compile(
        loss = 'mean_absolute_error',
        optimizer = keras.optimizers.Adam(learning_rate = 0.001)
    )

    history_dnn = model.fit(
        train_X,
        train_Y,
        epochs = 500,
        batch_size = 100000,
        validation_data = (cv_X, cv_Y),
        verbose = 1,
        callbacks=[callback])

    test_results[f"Units: {num_of_units}"] = model.evaluate(
        test_X,
        test_Y,
        verbose = 1)

    cv_results[f"Units: {num_of_units}"] = model.evaluate(
        cv_X,
        cv_Y,
        verbose = 1)

# Grid Search

### Batch Size Search

In [18]:
# batch_sizes = [4, 16, 32, 128, 512, 2048, 8192, 32768, 131072]
# for batches in batch_sizes:
#    batch_grid_search(batches)

### Number of Units Search

In [19]:
num_of_units = [2, 4, 8, 16, 32, 64, 128, 256, 512]
for units in num_of_units:
     unit_grid_search(units)

# Results

In [None]:
pd.DataFrame(cv_results, index=['MAE [dB]']).T