In [2]:
import pandas as pd
path='/kaggle/input/important12/important12.csv'
data = pd.read_csv(path)
data=data.set_index('HE')
data.head()
data.shape

(4344, 1)

In [3]:
x_train = data.loc[:'20-05-2022 23:00']
x_train.head()
x_test = data.loc['21-06-2022 00:00':]
x_test.head()

Unnamed: 0_level_0,MWh
HE,Unnamed: 1_level_1
21-06-2022 00:00,10051
21-06-2022 01:00,9793
21-06-2022 02:00,9685
21-06-2022 03:00,9659
21-06-2022 04:00,9778


In [4]:
#scaling the data
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler(feature_range=(0, 1))
x_train_scaled = min_max_scaler.fit_transform(x_train.copy())
x_test_scaled = min_max_scaler.transform(x_test.copy())

In [5]:
print(x_train_scaled)


[[0.22387024]
 [0.20218579]
 [0.16844479]
 ...
 [0.40645329]
 [0.32804233]
 [0.24347298]]


In [7]:
# create a model by subclassing Model class in tensorflow
from tensorflow.keras import Model, Sequential
class AutoEncoder(Model):
  """
  Parameters
  ----------
  output_units: int
    Number of output units
  
  code_size: int
    Number of units in bottle neck
  """

  def __init__(self, output_units, code_size=8):
    super().__init__()
    self.encoder = Sequential([
      Dense(64, activation='relu'),
      Dropout(0.1),
      Dense(32, activation='relu'),
      Dropout(0.1),
      Dense(16, activation='relu'),
      Dropout(0.1),
      Dense(code_size, activation='relu')
    ])
    self.decoder = Sequential([
      Dense(16, activation='relu'),
      Dropout(0.1),
      Dense(32, activation='relu'),
      Dropout(0.1),
      Dense(64, activation='relu'),
      Dropout(0.1),
      Dense(output_units, activation='sigmoid')
    ])
  
  def call(self, inputs):
    encoded = self.encoder(inputs)
    decoded = self.decoder(encoded)
    return decoded

In [8]:
from tensorflow.keras.layers import Dense, Dropout
model = AutoEncoder(output_units=x_train_scaled.shape[1])
# configurations of model
model.compile(loss='msle', metrics=['mse'], optimizer='adam')

history = model.fit(
    x_train_scaled,
    x_train_scaled,
    epochs=20,
    batch_size=512,
    validation_data=(x_test_scaled, x_test_scaled)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [10]:
def find_threshold(model, x_train_scaled):
  reconstructions = model.predict(x_train_scaled)
  # provides losses of individual instances
  reconstruction_errors = tf.keras.losses.msle(reconstructions, x_train_scaled)

  # threshold for anomaly scores
  threshold = np.mean(reconstruction_errors.numpy()) \
      + np.std(reconstruction_errors.numpy())
  return threshold

def find_threshold_method_two(model, x_train_scaled):
  # another method to find threshold
  reconstructions = model.predict(x_train_scaled)
  # provides losses of individual instances
  reconstruction_errors = tf.keras.losses.msle(reconstructions, x_train_scaled)

  threshold_2 = np.percentile(reconstruction_errors, 95)
  return threshold_2

def get_predictions(model, x_test_scaled, threshold):
  predictions = model.predict(x_test_scaled)
  print(predictions)
  #print('Precision: %.3f' % precision_score(x_test_scaled, predictions))
  #print('Accuracy: %.3f' % accuracy_score(x_test_scaled,predictions))
  #print('Recall: %.3f' % recall_score(x_test_scaled, predictions))

  # provides losses of individual instances
  errors = tf.keras.losses.msle(predictions, x_test_scaled)
  # 0 = anomaly, 1 = normal
  anomaly_mask = pd.Series(errors) > threshold
  preds = anomaly_mask.map(lambda x: 1.0 if x == True else 0.0)
  return preds,predictions 

In [11]:
import tensorflow as tf
import numpy as np
threshold = find_threshold(model, x_train_scaled)
print(f"Threshold method one: {threshold}")

threshold_2 = find_threshold_method_two(model, x_train_scaled)
print(f"Threshold method two: {threshold_2}")

Threshold method one: 0.0023338880832650595
Threshold method two: 0.0024900425017977163


In [12]:
## 1 = anomaly, 0 = normal
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
preds,predictions = get_predictions(model, x_test_scaled, threshold)
print(preds)


[[0.15060392]
 [0.13242666]
 [0.12516217]
 [0.12358706]
 [0.13139348]
 [0.15639919]
 [0.25190768]
 [0.33589175]
 [0.32711607]
 [0.30990425]
 [0.33132672]
 [0.33732766]
 [0.35361347]
 [0.36597   ]
 [0.3832466 ]
 [0.4181846 ]
 [0.43947875]
 [0.51164186]
 [0.5353614 ]
 [0.50479496]
 [0.5100137 ]
 [0.44282258]
 [0.32176998]
 [0.18119523]
 [0.18725255]
 [0.19883361]
 [0.17366914]
 [0.16295747]
 [0.2076231 ]
 [0.2595552 ]
 [0.34245464]
 [0.384772  ]
 [0.38893327]
 [0.38577026]
 [0.39579123]
 [0.39011037]
 [0.38377878]
 [0.4019162 ]
 [0.39217332]
 [0.40793243]
 [0.42843565]
 [0.4585994 ]
 [0.5193447 ]
 [0.49058503]
 [0.50479496]
 [0.4421791 ]
 [0.35465226]
 [0.20749699]
 [0.23851632]
 [0.19498718]
 [0.18852504]
 [0.1830586 ]
 [0.19564255]
 [0.2589378 ]
 [0.34632036]
 [0.3866029 ]
 [0.38743615]
 [0.37677747]
 [0.39328003]
 [0.3826083 ]
 [0.38944808]
 [0.40890068]
 [0.4264489 ]
 [0.44417447]
 [0.49694523]
 [0.5570536 ]
 [0.61119646]
 [0.57390803]
 [0.5408406 ]
 [0.4876585 ]
 [0.37367004]
 [0.30

In [13]:
!pip install -U keras-tuner

[0m

In [16]:
import kerastuner as kt

class AutoEncoderTuner(Model):

  def __init__(self, hp, output_units, code_size=8):
    super().__init__()
    dense_1_units = hp.Int('dense_1_units', min_value=16, max_value=72, step=4)
    dense_2_units = hp.Int('dense_2_units', min_value=16, max_value=72, step=4)
    dense_3_units = hp.Int('dense_3_units', min_value=16, max_value=72, step=4)
    dense_4_units = hp.Int('dense_4_units', min_value=16, max_value=72, step=4)
    dense_5_units = hp.Int('dense_5_units', min_value=16, max_value=72, step=4)
    dense_6_units = hp.Int('dense_6_units', min_value=16, max_value=72, step=4)
    
    self.encoder = Sequential([
      Dense(dense_1_units, activation='relu'),
      Dropout(0.1),
      Dense(dense_2_units, activation='relu'),
      Dropout(0.1),
      Dense(dense_3_units, activation='relu'),
      Dropout(0.1),
      Dense(code_size, activation='relu')
    ])
    self.decoder = Sequential([
      Dense(dense_4_units, activation='relu'),
      Dropout(0.1),
      Dense(dense_5_units, activation='relu'),
      Dropout(0.1),
      Dense(dense_6_units, activation='relu'),
      Dropout(0.1),
      Dense(output_units, activation='sigmoid')
    ])
  
  def call(self, inputs):
    encoded = self.encoder(inputs)
    decoded = self.decoder(encoded)
    return decoded


def build_model(hp):
  model = AutoEncoderTuner(hp, 140)
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
  model.compile(
      loss='msle',
      optimizer=Adam(learning_rate=hp_learning_rate),
  )
  return model

In [15]:
pip install kerastuner

[31mERROR: Could not find a version that satisfies the requirement kerastuner (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for kerastuner[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [17]:
from tensorflow.keras.optimizers import Adam
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=20,
    factor=3,
    directory='autoencoder',
    project_name='tuning_autoencoder6'
)

tuner.search(
    x_train_scaled, 
    x_train_scaled, 
    epochs=20, 
    batch_size=512,
    validation_data=(x_test_scaled, x_test_scaled)
)

Trial 30 Complete [00h 00m 06s]
val_loss: 0.01828206516802311

Best val_loss So Far: 3.984203431173228e-05
Total elapsed time: 00h 01m 56s


In [18]:
hparams = [f'dense_{i}_units' for i in range(1,7)] + ['learning_rate']
best_hyperparams = tuner.get_best_hyperparameters()
for hps in hparams:
  print(f"{hps}: {best_hyperparams[0][hps]}")

dense_1_units: 68
dense_2_units: 60
dense_3_units: 16
dense_4_units: 72
dense_5_units: 24
dense_6_units: 32
learning_rate: 0.01


In [19]:
best_model = tuner.get_best_models()[0]
best_model.compile(loss='msle', optimizer=Adam(0.001))

best_model.fit(
    x_train,
    x_train,
    epochs=20,
    batch_size=512,
    validation_data=(x_test_scaled, x_test_scaled)
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7eaf38711d20>

In [20]:
threshold_ = find_threshold(best_model, x_train_scaled)
print(f"Threshold: {threshold}")
preds_ = get_predictions(best_model, x_test_scaled, threshold_)
print(preds)


Threshold: 0.0023338880832650595
[[0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]
 [0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]
 [0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]
 ...
 [0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]
 [0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]
 [0.06447978 0.06465891 0.06559097 ... 0.06569984 0.06319913 0.06902071]]
0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
235    1.0
236    1.0
237    0.0
238    0.0
239    0.0
Length: 240, dtype: float64


In [21]:
from sklearn.metrics import precision_score, accuracy_score
import numpy as np
x_test_scaled_array=np.array(x_test_scaled)
x_test_scaled_array = pd.DataFrame(x_test_scaled,index = x_test.index,columns=['test scale'])
x_test_scaled_array.head()
predictions_array=np.array( predictions)
predictions_array = pd.DataFrame( predictions,index = x_test.index,columns=['predicted scale'])
predictions_array.head()

Unnamed: 0_level_0,predicted scale
HE,Unnamed: 1_level_1
21-06-2022 00:00,0.150604
21-06-2022 01:00,0.132427
21-06-2022 02:00,0.125162
21-06-2022 03:00,0.123587
21-06-2022 04:00,0.131393


In [23]:
predictions_array.to_csv('AUTOENCODER_forecast.csv')