## Building Linear model

In [18]:
import scipy.stats as spy
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [19]:
# Load your data into a pandas DataFrame
df = pd.read_csv("../data/powerconsumption.csv");
df.drop(["DiffuseFlows"], axis=1, inplace=True)
df.Datetime = pd.to_datetime(df.Datetime);
reference_datetime = pd.to_datetime("01/01/2017 00:00", format='%d/%m/%Y %H:%M')
df.Datetime = ((df['Datetime'] - reference_datetime).dt.total_seconds() // 60) % 60
df

Unnamed: 0,Datetime,Temperature,Humidity,WindSpeed,GeneralDiffuseFlows,PowerConsumption_Zone1,PowerConsumption_Zone2,PowerConsumption_Zone3
0,0.0,6.559,73.8,0.083,0.051,34055.69620,16128.87538,20240.96386
1,10.0,6.414,74.5,0.083,0.070,29814.68354,19375.07599,20131.08434
2,20.0,6.313,74.5,0.080,0.062,29128.10127,19006.68693,19668.43373
3,30.0,6.121,75.0,0.083,0.091,28228.86076,18361.09422,18899.27711
4,40.0,5.921,75.7,0.081,0.048,27335.69620,17872.34043,18442.40964
...,...,...,...,...,...,...,...,...
52411,10.0,7.010,72.4,0.080,0.040,31160.45627,26857.31820,14780.31212
52412,20.0,6.947,72.6,0.082,0.051,30430.41825,26124.57809,14428.81152
52413,30.0,6.900,72.8,0.086,0.084,29590.87452,25277.69254,13806.48259
52414,40.0,6.758,73.0,0.080,0.066,28958.17490,24692.23688,13512.60504


In [20]:
# Define the number of time steps (days)
time_steps = 6*24*7

In [21]:
# Split the data into features (X) and target (y)
features = ["Datetime", "Temperature", "Humidity", "WindSpeed", "GeneralDiffuseFlows"]
target = ['PowerConsumption_Zone1']

X = df[features]
y = df[target]

In [22]:
# Normalize the features and target using Min-Max scaling
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y)

In [23]:
# Create sequences for LSTM input
X_sequences = []
y_sequences = []

In [24]:
for i in range(len(X_scaled) - time_steps + 1):
    X_sequences.append(X_scaled[i:i+time_steps, :])
    # Use the avg of the last consumption as target of the last 7 days
    y_sequences.append(np.mean(y_scaled[i:i+time_steps, 0]))
    # y_sequences.append(y_scaled[i+time_steps-1, 0])  # Use the last day's power consumption as target

X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)

In [25]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

In [26]:
# Build the LSTM model
model = tf.keras.Sequential([
    tf.keras.layers.LSTM(16, activation='tanh', input_shape=(time_steps, len(features))),
    tf.keras.layers.Dense(1, activation='linear')
])

In [27]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

In [28]:
# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))

2023-08-29 18:03:44.250987: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 829120320 exceeds 10% of free system memory.


Epoch 1/5

2023-08-29 18:06:59.462587: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 207285120 exceeds 10% of free system memory.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f81cf955fc0>

In [29]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 16)                1408      
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1,425
Trainable params: 1,425
Non-trainable params: 0
_________________________________________________________________


In [30]:
# Make predictions using the test dataset
y_pred_scaled = model.predict(X_test)

# Inverse transform the scaled predictions to the original scale
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Inverse transform the scaled actual values (if needed)
y_actual = scaler_y.inverse_transform(y_test.reshape(-1, 1))


y_actual_plt = y_actual[:len(y_actual)//10]
y_pred_plt = y_pred[:len(y_actual)//10]
fig,ax = plt.subplots()
ax.scatter(range(len(y_actual_plt)), y_actual_plt)
ax.scatter(range(len(y_pred_plt)), y_pred_plt)
plt.show()

# Calculate performance metrics (e.g., Mean Squared Error)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_actual, y_pred)
print(f"Mean Squared Error: {mse:.2f}")


[[31409.783]
 [33163.03 ]
 [32092.438]
 ...
 [33389.2  ]
 [29278.242]
 [33474.016]]
[[30320.2663439 ]
 [33709.57174108]
 [28880.76923089]
 ...
 [30922.3864562 ]
 [29174.44021971]
 [32654.33879792]]
Mean Squared Error: 2572127.83
