In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
df=pd.read_csv('/kaggle/input/energy-demand-forecast/featured_power_consumption.csv')
df.head()

In [None]:
import pandas as pd
statistics = df['Global_active_power'].agg(['mean', 'min', 'median', 'max'])
average=df['Global_active_power'].mean()

print(statistics)
print('avg',average)

In [None]:
print("Start Datetime:", df.Datetime.min())
print("End Datetime:",df.Datetime.max())

In [None]:
df.isnull().sum()

In [None]:
from sklearn.preprocessing import MinMaxScaler

dataset = df.Global_active_power.values.reshape(-1,1)

scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)

In [None]:
# Assuming scaled_data is already prepared

train_size = int(len(scaled_data) * 0.8)
train, test = scaled_data[:train_size], scaled_data[train_size:]

# Now you can use the `train` and `test` datasets as shown in the previous code
look_back = 168

# Prepare the training dataset
X_train, y_train = [], []
for i in range(len(train) - look_back - 1):
    X_train.append(train[i:(i + look_back), 0])
    y_train.append(train[i + look_back, 0])
X_train = np.array(X_train)
y_train = np.array(y_train)

# Prepare the testing dataset
X_test, y_test = [], []
for i in range(len(test) - look_back - 1):
    X_test.append(test[i:(i + look_back), 0])
    y_test.append(test[i + look_back, 0])
X_test = np.array(X_test)
y_test = np.array(y_test)


In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], look_back,1))
X_test = np.reshape(X_test, (X_test.shape[0], look_back ,1))

In [None]:
X_train.shape

In [None]:
print("Shape of X_train:", X_train.shape)  # Should be (samples, look_back, 1)
print("Shape of X_test:", X_test.shape)  

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, BatchNormalization, Flatten, AdditiveAttention, Multiply

input_layer = Input(shape=(X_train.shape[1], X_train.shape[2]))

# First LSTM layer
lstm_1 = LSTM(units=50, return_sequences=True)(input_layer)

# Batch Normalization after the first LSTM layer
batch_norm_1 = BatchNormalization()(lstm_1)

# Second LSTM layer
lstm_2 = LSTM(units=50, return_sequences=True)(batch_norm_1)

# Batch Normalization after the second LSTM layer
batch_norm_2 = BatchNormalization()(lstm_2)

# Attention mechanism applied on lstm_2 output
attention = AdditiveAttention(name='attention_weight')
attention_output = attention([batch_norm_2, batch_norm_2])

# Multiply the attention output with the LSTM output
multiply_layer = Multiply()([batch_norm_2, attention_output])

# Flattening the output
flatten_layer = Flatten()(multiply_layer)

# Dropout layer for regularization
dropout_layer = Dropout(0.3)(flatten_layer)

# Output layer
output_layer = Dense(1)(dropout_layer)

# Defining the model
model = Model(inputs=input_layer, outputs=output_layer)

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error',  metrics=['mae'])
model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import joblib

scaler = MinMaxScaler(feature_range=(0, 1))
X_train_scaled = scaler.fit_transform(scaled_data.reshape(-1, 1))

# Define the callback to save the best model based on validation loss
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min', verbose=1)

# Early stopping callback to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)

# Reduce learning rate when validation loss plateaus
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=1)

# Combine the callbacks into a list
callbacks_list = [early_stopping, model_checkpoint, reduce_lr]

# Train the model with the defined callbacks
#history = model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.1, callbacks=callbacks_list, verbose=1)
joblib.dump(scaler, 'scaler.pkl')

In [None]:
'''
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
'''

In [None]:
#test_loss = model.evaluate(X_test, y_test)
#print("Test Loss: ", test_loss)

In [None]:
from tensorflow.keras.models import load_model

model =load_model('/kaggle/input/model/keras/default/1/best_model (1).keras')

In [None]:
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Calculate metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print results
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R² Score: {r2:.4f}")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming `Y_test` contains actual values and `predictions` contains predicted values

# Create a figure object with a desired figure size
plt.figure(figsize=(20,6))

# Plot the actual values (assuming Y_test contains the actual time series data)
plt.plot(range(4500), y_test[:4500], marker='.', label="Actual", color='purple')
# Plot the predicted values (assuming `predictions` is the array of model predictions)
plt.plot(range(4500), y_pred[:4500], '-', label="Prediction", color='red')

# Remove the top spines for a cleaner look
sns.despine(top=True)

# Adjusting the subplot location
plt.subplots_adjust(left=0.07)

# Label the y-axis
plt.ylabel('Global_active_power', size=14)

# Label the x-axis (in your case, it might be time steps or indices)
plt.xlabel('Time step', size=14)

# Adding a legend with a font size of 16
plt.legend(fontsize=16)

# Display the plot
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Step 2: Flatten the predictions
predicted_values = y_pred.flatten()

# Step 3: Select a smaller range (e.g., first 500 steps)
small_range = 2000
predicted_values_small = predicted_values[:small_range]

# Step 4: Identify Peak Demand (top 5% highest predicted values in the small range)
peak_threshold = np.percentile(predicted_values_small, 95)
peak_demand_indices = np.where(predicted_values_small >= peak_threshold)[0]

# Step 5: Identify Low Demand (bottom 5% lowest predicted values in the small range)
low_threshold = np.percentile(predicted_values_small, 5)
low_demand_indices = np.where(predicted_values_small <= low_threshold)[0]

# Step 6: Plot the results
plt.figure(figsize=(12,6))

# Plot predicted values for the small range
plt.plot(np.arange(small_range), predicted_values_small, label="Predicted Demand", color='blue')

# Highlight peak demand periods
plt.scatter(peak_demand_indices, predicted_values_small[peak_demand_indices], color='red', label='Peak Demand', zorder=5)

# Highlight low demand periods
plt.scatter(low_demand_indices, predicted_values_small[low_demand_indices], color='green', label='Low Demand', zorder=5)

# Add labels and title
plt.xlabel('Time Steps', fontsize=14)
plt.ylabel('Predicted Demand', fontsize=14)
plt.title('Energy Demand Prediction (First 500 Steps)', fontsize=16)
plt.legend(fontsize=12)

# Display the plot
plt.show()

# Step 7: Print the indices of peak and low demand in the small range
print("Peak Demand Indices (First 500 Steps):", peak_demand_indices)
print("Low Demand Indices (First 500 Steps):", low_demand_indices)

In [None]:
def plot_power_consumption(start_date, end_date):
    # Filter data for the specified date range
    mask = (df['Datetime'] >= start_date) & (df['Datetime'] <= end_date)
    filtered_data = df.loc[mask]

    # Plot the power consumption
    plt.figure(figsize=(12, 6))
    plt.plot(filtered_data['Datetime'], filtered_data['Global_active_power'], label='Global Active Power', color='blue')
    plt.xlabel('Datetime')
    plt.ylabel('Power Consumption (kW)')
    plt.title(f'Power Consumption from {start_date} to {end_date}')
    plt.legend()
    plt.grid()
    plt.tight_layout()
    plt.show()

# Example usage: Provide the date range
plot_power_consumption('2007-01-01 00:23:00', '2007-01-05 00:01:00')
