# Power plant energy output prediction

In [None]:
%%bash
rm -rf CCPP/
# retrieve data from the UCI repository
wget -q https://archive.ics.uci.edu/static/public/294/combined+cycle+power+plant.zip -O ccpp.zip
unzip ccpp.zip && rm ccpp.zip
cd CCPP && rm $(ls | grep -v .xlsx)

### Import libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

# Set random seed for reproducible results
SEED_NUMBER = 22
tf.random.set_seed(SEED_NUMBER)

print(tf.__version__)

### Data preprocessing

In [None]:
dataset = pd.read_excel('CCPP/Folds5x2_pp.xlsx')
dataset.sample(3)

#### Features and predictions

In [None]:
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

#### Split into training & testing sets

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

### Neural network

#### Init NN

In [None]:
nn = tf.keras.models.Sequential()

# first layer: input & first hidden
nn.add(tf.keras.layers.Dense(units=6, activation='relu'))
nn.add(tf.keras.layers.Dense(units=6, activation='relu'))
nn.add(tf.keras.layers.Dense(units=1))

#### Training NN

##### Compile

In [None]:
nn.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

##### Training set

In [None]:
history = nn.fit(x=x_train, y=y_train, batch_size=32, epochs=100)

##### Metrics information graphic

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

acc = np.array(history.history['accuracy'])
loss = np.array(history.history['loss'])
epochs = range(1, len(acc)+1)

fig, ax = plt.subplots()
ax.grid(ls=":")
ax.set_yscale('log')

# axis params
ax.set_xticks(np.arange(0, np.max(epochs)+1, 10))
ax.xaxis.set_minor_locator(MultipleLocator(5))
ax.set_xlim([0, np.max(epochs)+0.5])

# ax.set_yticks(np.arange(0, np.max(loss)+1, 5))
# ax.yaxis.set_minor_locator(MultipleLocator(1))
ax.set_ylim([8, np.max(loss)+0.5])

ax.plot(epochs, acc*1000, c="black", lw=2, label="accuracy (x 1000)")
ax.plot(epochs, loss, c="blue", lw=2, label="loss")

ax.set_xlabel("Epochs")
ax.set_ylabel("Metrics")

ax.legend(frameon=True, fancybox=True, facecolor="lightgray",
           edgecolor="dimgray", framealpha=1.0,)
plt.show()

#### Predictions

In [None]:
y_pred = nn.predict(x=x_test)

##### Comparison between test and predictions

In [None]:
# compute difference between predictions and test values
diff = (y_pred.reshape(len(y_pred),1) - y_test.reshape(len(y_test), 1))

In [None]:
# number of elements in the same
sample = range(1, len(y_pred)+1)

fig, ax = plt.subplots()
ax.grid(ls=":")

# axis param
ax.set_xlim([0, np.max(sample)+0.5])

# make scatter plot
ax.scatter(sample, diff, s=10)

# axis label
ax.set_xlabel("Sample element")
ax.set_ylabel("Difference (y$_{\\rm pred}$ - y$_{\\rm test}$)")

plt.show();