In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os

# --- Define file paths and load the processed data from Google Drive ---
print("Loading the processed DataFrame from Google Drive...")
base_path = '/content/drive/MyDrive/smart_traffic_system'
processed_file_path = os.path.join(base_path, 'traffic_data_processed.parquet')
df = pd.read_parquet(processed_file_path)
print(f"DataFrame loaded with shape: {df.shape}")

# --- Separate target data and features ---
target_columns = [col for col in df.columns if col.startswith('sensor_')]
target_data = df[target_columns].values
print(f"Target data shape: {target_data.shape}")

# --- Split the data into training and validation sets ---
train_split = 0.8
num_samples = df.shape[0]
num_train_samples = int(num_samples * train_split)
num_val_samples = num_samples - num_train_samples

X_train_targets = target_data[:num_train_samples]
X_val_targets = target_data[num_train_samples:]

# --- Normalize the data ---
print("\nNormalizing the target data...")
scaler = MinMaxScaler()
scaler.fit(X_train_targets)

X_train_targets_scaled = scaler.transform(X_train_targets)
X_val_targets_scaled = scaler.transform(X_val_targets)
print("Data successfully normalized.")

# --- Create sequential data format for the Transformer ---
def create_sequences(data, input_steps, output_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - output_steps):
        X.append(data[i:(i + input_steps)])
        y.append(data[(i + input_steps):(i + input_steps + output_steps)])
    return np.array(X), np.array(y)

input_sequence_length = 12 # 1 hour of history (12 * 5-minute intervals)
output_sequence_length = 6 # 30 minutes to predict (6 * 5-minute intervals)

X_train_seq, y_train_seq = create_sequences(X_train_targets_scaled, input_sequence_length, output_sequence_length)
X_val_seq, y_val_seq = create_sequences(X_val_targets_scaled, input_sequence_length, output_sequence_length)

print("\nSequential data creation complete.")
print(f"Training input sequences shape: {X_train_seq.shape}")
print(f"Training output sequences shape: {y_train_seq.shape}")
print(f"Validation input sequences shape: {X_val_seq.shape}")
print(f"Validation output sequences shape: {y_val_seq.shape}")

Loading the processed DataFrame from Google Drive...
DataFrame loaded with shape: (52116, 325)
Target data shape: (52116, 325)

Normalizing the target data...
Data successfully normalized.

Sequential data creation complete.
Training input sequences shape: (41674, 12, 325)
Training output sequences shape: (41674, 6, 325)
Validation input sequences shape: (10406, 12, 325)
Validation output sequences shape: (10406, 6, 325)


In [3]:
# --- Define the Transformer block ---
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

# --- Build the full Transformer model ---
def build_transformer_model(input_shape, output_seq_len):
    embed_dim = 100  # Embedding size for each token
    num_heads = 4    # Number of attention heads
    ff_dim = 32      # Hidden layer size in feedforward network

    inputs = layers.Input(shape=input_shape)

    x = layers.Dense(embed_dim)(inputs)

    transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
    x = transformer_block(x)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(20, activation="relu")(x)
    x = layers.Dropout(0.1)(x)

    outputs = layers.Dense(output_seq_len * input_shape[-1])(x)
    outputs = layers.Reshape((output_seq_len, input_shape[-1]))(outputs)

    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

input_shape = (X_train_seq.shape[1], X_train_seq.shape[2])
model = build_transformer_model(input_shape, output_sequence_length)
model.summary()

In [4]:
# --- Compile the model ---
model.compile(optimizer="adam", loss="mean_squared_error")

# --- Train the model ---
history = model.fit(
    X_train_seq,
    y_train_seq,
    batch_size=64,
    epochs=100,
    validation_data=(X_val_seq, y_val_seq)
)

# --- Save the trained model ---
model_save_path = os.path.join(base_path, 'transformer_model.h5')
model.save(model_save_path)
print(f"\nModel saved to: {model_save_path}")

Epoch 1/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - loss: 0.0874 - val_loss: 0.0075
Epoch 2/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 0.0120 - val_loss: 0.0065
Epoch 3/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - loss: 0.0103 - val_loss: 0.0064
Epoch 4/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0092 - val_loss: 0.0060
Epoch 5/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0081 - val_loss: 0.0059
Epoch 6/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.0072 - val_loss: 0.0057
Epoch 7/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - loss: 0.0064 - val_loss: 0.0053
Epoch 8/100
[1m652/652[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - loss: 0.0056 - val_loss: 0.0053
Epoch 9/100
[1m652/652[0m [




Model saved to: /content/drive/MyDrive/smart_traffic_system/transformer_model.h5


In [5]:
import numpy as np
import pandas as pd
from tensorflow import keras
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import os

# --- Step 15: Load the trained model and make predictions ---
print("Loading the trained model...")
base_path = '/content/drive/MyDrive/smart_traffic_system'
model_path = os.path.join(base_path, 'transformer_model.h5')

try:
    model = keras.models.load_model(model_path)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    # The model architecture code from Step 15 needs to be in a previous cell
    # in the notebook for this to work.

# Make predictions on the validation set
print("Making predictions on the validation data...")
y_pred_scaled = model.predict(X_val_seq)
print(f"Prediction shape: {y_pred_scaled.shape}")

# The predictions are scaled, so we need to inverse-transform them
# We will reshape the predictions to a 2D array for inverse transformation
num_val_seq = y_val_seq.shape[0]
num_output_steps = y_val_seq.shape[1]
num_sensors = y_val_seq.shape[2]

y_val_seq_flat = y_val_seq.reshape(num_val_seq * num_output_steps, num_sensors)
y_pred_scaled_flat = y_pred_scaled.reshape(num_val_seq * num_output_steps, num_sensors)

# We need the scaler object to inverse-transform. The scaler was fit on X_train_targets.
# We will load the scaler from memory if the notebook is run top-to-bottom.
# We need to make sure we have the original scaler object from Step 14.

# Inverse transform the true values and the predictions
y_val_true = scaler.inverse_transform(y_val_seq_flat)
y_val_pred = scaler.inverse_transform(y_pred_scaled_flat)

# --- Step 16: Evaluate model performance ---
print("\nEvaluating model performance...")

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(y_val_true, y_val_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")

# Calculate Mean Absolute Error (MAE) - a more intuitive metric
mae = np.mean(np.abs(y_val_true - y_val_pred))
print(f"Mean Absolute Error (MAE): {mae:.2f}")

# Calculate R-squared (R2) score
r2 = r2_score(y_val_true, y_val_pred)
print(f"R-squared (R2) Score: {r2:.4f}")

Loading the trained model...
Error loading model: Unknown layer: 'TransformerBlock'. Please ensure you are using a `keras.utils.custom_object_scope` and that this object is included in the scope. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.
Making predictions on the validation data...
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
Prediction shape: (10406, 6, 325)

Evaluating model performance...
Mean Squared Error (MSE): 23.17
Root Mean Squared Error (RMSE): 4.81
Mean Absolute Error (MAE): 2.56
R-squared (R2) Score: 0.6277


In [6]:
import numpy as np
import pandas as pd
from tensorflow import keras
from tensorflow.keras.utils import custom_object_scope # <-- New import
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import os

# We need to re-define the custom TransformerBlock class so Keras knows what it is.
class TransformerBlock(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [keras.layers.Dense(ff_dim, activation="relu"), keras.layers.Dense(embed_dim),]
        )
        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = keras.layers.Dropout(rate)
        # Store parameters for serialization
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "ff_dim": self.ff_dim,
            "rate": self.rate,
        })
        return config

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)


# --- Step 18: Load the trained model with custom objects ---
print("Loading the trained model with custom objects...")
base_path = '/content/drive/MyDrive/smart_traffic_system'
model_path = os.path.join(base_path, 'transformer_model.h5')

with custom_object_scope({'TransformerBlock': TransformerBlock}):
    model = keras.models.load_model(model_path)

print("Model loaded successfully.")

# We will now use the scaler and X_val_seq from the previous cells, so make sure they are in memory

# Make predictions on the validation set
y_pred_scaled = model.predict(X_val_seq)

# --- Inverse transform the true values and the predictions ---
num_val_seq = y_val_seq.shape[0]
num_output_steps = y_val_seq.shape[1]
num_sensors = y_val_seq.shape[2]

y_val_seq_flat = y_val_seq.reshape(num_val_seq * num_output_steps, num_sensors)
y_pred_scaled_flat = y_pred_scaled.reshape(num_val_seq * num_output_steps, num_sensors)

y_val_true = scaler.inverse_transform(y_val_seq_flat)
y_val_pred = scaler.inverse_transform(y_pred_scaled_flat)

# --- Evaluate model performance ---
print("\nEvaluating model performance...")
mse = mean_squared_error(y_val_true, y_val_pred)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(y_val_true - y_val_pred))
r2 = r2_score(y_val_true, y_val_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R2) Score: {r2:.4f}")

Loading the trained model with custom objects...




Model loaded successfully.
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step

Evaluating model performance...
Mean Squared Error (MSE): 23.17
Root Mean Squared Error (RMSE): 4.81
Mean Absolute Error (MAE): 2.56
R-squared (R2) Score: 0.6277


In [7]:
import numpy as np
import pandas as pd

# We need the original, un-sequenced data to map anomalies back to dates.
# We will assume we have 'df' from a previous cell.
# The 'df' DataFrame was created in Step 13 (Corrected).

# --- Step 19: Calculate Prediction Errors ---
print("Calculating prediction errors...")
# The errors are the absolute difference between the true and predicted values
prediction_errors = np.abs(y_val_true - y_val_pred)

# We have a 2D array of errors. Let's find the average error for each prediction window
# The error for each 30-minute prediction window is the mean of all 6 steps and 325 sensors
average_prediction_error_per_window = np.mean(prediction_errors, axis=(1, 0)) # Average error for each sensor over the prediction window

# --- Step 20: Identify Anomalies based on a threshold ---
print("\nIdentifying anomalies...")

# We will use a simple statistical threshold.
# Anomaly threshold: 3 standard deviations above the mean error
error_mean = np.mean(average_prediction_error_per_window)
error_std = np.std(average_prediction_error_per_window)
anomaly_threshold = error_mean + (3 * error_std)

print(f"Average prediction error: {error_mean:.2f}")
print(f"Anomaly threshold (Mean + 3*Std): {anomaly_threshold:.2f}")

# Find the indices where the error exceeds the threshold
anomaly_indices = np.where(average_prediction_error_per_window > anomaly_threshold)[0]
print(f"Found {len(anomaly_indices)} potential anomalies.")

# --- Step 21: Map anomalies back to dates and sensors ---
# The anomaly indices correspond to our validation sequences.
# We need to map them back to the original timestamps from our DataFrame.
val_dates = df.index[num_train_samples:]

print("\nExamples of detected anomalies:")
for i in anomaly_indices[:5]: # Print the first 5 anomalies
    # The anomaly index 'i' corresponds to the sequence number.
    # We need to find the start date of that sequence.
    anomaly_start_date = val_dates[i + input_sequence_length]

    # We can also find the sensor with the highest error in that anomaly window
    max_error_sensor_index = np.argmax(np.mean(prediction_errors[i], axis=0))
    max_error_sensor_id = f'sensor_{max_error_sensor_index}'

    print(f"Anomaly detected starting on: {anomaly_start_date.strftime('%Y-%m-%d %H:%M')}")
    print(f"  - Max error was on {max_error_sensor_id}")

Calculating prediction errors...

Identifying anomalies...
Average prediction error: 2.56
Anomaly threshold (Mean + 3*Std): 2.56
Found 0 potential anomalies.

Examples of detected anomalies:


  anomaly_indices = np.where(average_prediction_error_per_window > anomaly_threshold)[0]


In [8]:
import numpy as np
import pandas as pd

# We need the original, un-sequenced data to map anomalies back to dates.
# We will assume we have 'df' from a previous cell.
# The 'df' DataFrame was created in Step 13 (Corrected).

# --- Step 19 (Corrected): Calculate Prediction Errors ---
print("Calculating prediction errors...")
# The errors are the absolute difference between the true and predicted values
# These are already flattened to 2D, so we can't average by window this way.
prediction_errors = np.abs(y_val_true - y_val_pred)

# --- Step 20 (Corrected): Identify Anomalies based on a threshold ---
print("\nIdentifying anomalies...")

# Let's calculate the mean prediction error per sensor over all validation data.
average_prediction_error_per_sensor = np.mean(prediction_errors, axis=0)

# We will define an anomaly as any single prediction error that is a certain
# number of standard deviations above the mean error for that specific sensor.
# This is a more robust approach than a single threshold for all predictions.

# Calculate the mean and standard deviation for each sensor's error
error_mean_per_sensor = np.mean(prediction_errors, axis=0)
error_std_per_sensor = np.std(prediction_errors, axis=0)

# Set the anomaly threshold per sensor
anomaly_threshold_per_sensor = error_mean_per_sensor + (3 * error_std_per_sensor)

# Find the indices of all individual prediction errors that are anomalies
anomaly_mask = prediction_errors > anomaly_threshold_per_sensor

# Get the row (prediction) and column (sensor) indices of anomalies
anomaly_indices = np.where(anomaly_mask)

print(f"Total number of anomalous predictions found: {len(anomaly_indices[0])}")

# --- Step 21 (Corrected): Map anomalies back to dates and sensors ---
# The anomaly indices correspond to our validation sequences.
# We need to map them back to the original timestamps from our DataFrame.
# Let's get the original dates for the validation set.
val_dates_flat = df.index[num_train_samples + input_sequence_length: num_train_samples + input_sequence_length + prediction_errors.shape[0]]

print("\nExamples of detected anomalies:")
if len(anomaly_indices[0]) > 0:
    for i in range(min(5, len(anomaly_indices[0]))): # Print the first 5 anomalies
        time_step_index = anomaly_indices[0][i]
        sensor_index = anomaly_indices[1][i]

        anomaly_date = val_dates_flat[time_step_index]
        anomaly_sensor = f'sensor_{sensor_index}'

        true_value = y_val_true[time_step_index, sensor_index]
        predicted_value = y_val_pred[time_step_index, sensor_index]
        error_value = prediction_errors[time_step_index, sensor_index]

        print(f"Anomaly detected on {anomaly_date.strftime('%Y-%m-%d %H:%M')}")
        print(f"  - Sensor: {anomaly_sensor}")
        print(f"  - True Speed: {true_value:.2f}, Predicted Speed: {predicted_value:.2f}")
        print(f"  - Error: {error_value:.2f} (above threshold)")

else:
    print("No significant anomalies detected above the threshold.")

Calculating prediction errors...

Identifying anomalies...
Total number of anomalous predictions found: 445154

Examples of detected anomalies:
Anomaly detected on 2017-05-25 19:20
  - Sensor: sensor_17
  - True Speed: 63.50, Predicted Speed: 46.14
  - Error: 17.36 (above threshold)
Anomaly detected on 2017-05-25 19:20
  - Sensor: sensor_126
  - True Speed: 67.00, Predicted Speed: 55.07
  - Error: 11.93 (above threshold)
Anomaly detected on 2017-05-25 19:20
  - Sensor: sensor_301
  - True Speed: 31.80, Predicted Speed: 58.62
  - Error: 26.82 (above threshold)
Anomaly detected on 2017-05-25 19:20
  - Sensor: sensor_313
  - True Speed: 75.90, Predicted Speed: 61.65
  - Error: 14.25 (above threshold)
Anomaly detected on 2017-05-25 19:25
  - Sensor: sensor_17
  - True Speed: 64.40, Predicted Speed: 46.56
  - Error: 17.84 (above threshold)


In [9]:
import numpy as np
import pandas as pd
import os

# --- Step D (Final): Create the DataFrame for Power BI from scratch ---
print("\nCreating DataFrame for Power BI...")

# We need the flattened versions of our data
y_true_flat = y_val_true.flatten()
y_pred_flat = y_val_pred.flatten()

# Calculate the total number of individual predictions
total_predictions = y_true_flat.shape[0]

# --- Reconstruct the 'Date' and 'Sensor' columns ---
# The number of sensors is 325.
# The number of unique time steps is total_predictions / num_sensors
num_unique_time_steps = y_val_true.shape[0]

# Get the date range for the validation predictions
start_val_date = df.index[num_train_samples + input_sequence_length]
val_dates_for_predictions = pd.date_range(
    start=start_val_date,
    periods=num_unique_time_steps,
    freq='5min'
)

# Create the repeated date array
date_array = val_dates_for_predictions.repeat(y_val_true.shape[1])

# Create the tiled sensor array
sensors_array = np.tile(df[target_columns].columns.values, num_unique_time_steps)

# --- Build the DataFrame ---
powerbi_df = pd.DataFrame({
    'Date': date_array,
    'Sensor': sensors_array,
    'TrueSpeed': y_true_flat,
    'PredictedSpeed': y_pred_flat,
    'PredictionError': (y_true_flat - y_pred_flat),
})

# --- Step E: Add IsAnomaly column ---
# We need to get the error stats per sensor from the un-flattened arrays.
prediction_errors_per_sensor = np.abs(y_val_true - y_val_pred)

error_mean_per_sensor = np.mean(prediction_errors_per_sensor, axis=0)
error_std_per_sensor = np.std(prediction_errors_per_sensor, axis=0)

anomaly_threshold_per_sensor = error_mean_per_sensor + (3 * error_std_per_sensor)

anomaly_mask_flat = (np.abs(y_val_true - y_val_pred) > anomaly_threshold_per_sensor).flatten()

powerbi_df['IsAnomaly'] = anomaly_mask_flat

print("Combined DataFrame created.")
print(powerbi_df.head())
print(f"DataFrame shape: {powerbi_df.shape}")

# --- Step F: Save the DataFrame to a CSV file ---
output_file_path = os.path.join(base_path, 'dashboard_data.csv')

try:
    powerbi_df.to_csv(output_file_path, index=False)
    print(f"\nDataFrame successfully saved to {output_file_path}")
    print("This file is now ready for import into Power BI.")

except Exception as e:
    print(f"An error occurred while saving the CSV file: {e}")


Creating DataFrame for Power BI...
Combined DataFrame created.
                 Date    Sensor  TrueSpeed  PredictedSpeed  PredictionError  \
0 2017-05-25 19:20:00  sensor_0       71.1       69.919022         1.180978   
1 2017-05-25 19:20:00  sensor_1       61.8       54.053997         7.746003   
2 2017-05-25 19:20:00  sensor_2       63.7       59.482365         4.217635   
3 2017-05-25 19:20:00  sensor_3       68.2       61.040920         7.159080   
4 2017-05-25 19:20:00  sensor_4       68.1       61.700882         6.399118   

   IsAnomaly  
0      False  
1      False  
2      False  
3      False  
4      False  
DataFrame shape: (20291700, 6)

DataFrame successfully saved to /content/drive/MyDrive/smart_traffic_system/dashboard_data.csv
This file is now ready for import into Power BI.


In [14]:
import numpy as np
import pandas as pd
import h5py
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras.utils import custom_object_scope

# --- Step A: Define the custom TransformerBlock class ---
# This is necessary for loading the model
class TransformerBlock(keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1, **kwargs):
        super().__init__(**kwargs)
        self.att = keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [keras.layers.Dense(ff_dim, activation="relu"), keras.layers.Dense(embed_dim),]
        )
        self.layernorm1 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = keras.layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "ff_dim": self.ff_dim,
            "rate": self.rate,
        })
        return config

    def call(self, inputs):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output)
        return self.layernorm2(out1 + ffn_output)

# --- Step B: Load all the necessary data and the model ---
print("Loading all necessary data and model...")
base_path = '/content/drive/MyDrive/smart_traffic_system'
raw_file_path = os.path.join(base_path, 'pems-bay.h5')
model_path = os.path.join(base_path, 'transformer_model.h5')

with h5py.File(raw_file_path, 'r') as hf:
    traffic_data = hf['speed']['block0_values'][:]

start_date = '2017-01-01 00:00:00'
num_intervals = traffic_data.shape[0]
date_range = pd.date_range(start=start_date, periods=num_intervals, freq='5min')
num_sensors = traffic_data.shape[1]
sensor_columns = [f'sensor_{i}' for i in range(num_sensors)]

df = pd.DataFrame(traffic_data, index=date_range, columns=sensor_columns)

target_columns = [col for col in df.columns if col.startswith('sensor_')]
target_data = df[target_columns].values

train_split = 0.8
num_samples = df.shape[0]
num_train_samples = int(num_samples * train_split)

X_train_targets = target_data[:num_train_samples]
X_val_targets = target_data[num_train_samples:]

scaler = MinMaxScaler()
scaler.fit(X_train_targets)
X_val_targets_scaled = scaler.transform(X_val_targets)

input_sequence_length = 12
output_sequence_length = 6

def create_sequences(data, input_steps, output_steps):
    X, y = [], []
    for i in range(len(data) - input_steps - output_steps):
        X.append(data[i:(i + input_steps)])
        y.append(data[(i + input_steps):(i + input_steps + output_steps)])
    return np.array(X), np.array(y)

X_val_seq, y_val_seq = create_sequences(X_val_targets_scaled, input_sequence_length, output_sequence_length)

with custom_object_scope({'TransformerBlock': TransformerBlock}):
    model = keras.models.load_model(model_path)

# --- Step C: Make predictions and inverse transform ---
print("Making predictions and inverse transforming...")
y_pred_scaled = model.predict(X_val_seq)

y_val_true = scaler.inverse_transform(y_val_seq.reshape(-1, num_sensors))
y_val_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, num_sensors))

# --- Step D: Create the DataFrame for Power BI ---
print("\nCreating DataFrame for Power BI...")

# Reconstruct the Date column
start_val_date_for_predictions = df.index[num_train_samples + input_sequence_length]
num_val_sequences = X_val_seq.shape[0]
total_prediction_steps = num_val_sequences * output_sequence_length

val_dates_for_predictions = pd.date_range(
    start=start_val_date_for_predictions,
    periods=total_prediction_steps,
    freq='5min'
)

# Reconstruct the Sensor column
sensors_array = np.tile(df[target_columns].columns.values, total_prediction_steps)

# Correct the date array length by repeating each date for each sensor
date_array_repeated = val_dates_for_predictions.repeat(num_sensors)

# Create the DataFrame
powerbi_df = pd.DataFrame({
    'Date': date_array_repeated,
    'Sensor': np.tile(df[target_columns].columns.values, total_prediction_steps),
    'TrueSpeed': y_val_true.flatten(),
    'PredictedSpeed': y_val_pred.flatten(),
    'PredictionError': (y_val_true - y_val_pred).flatten(),
})

# --- Step E: Add IsAnomaly column ---
prediction_errors_per_sensor = np.abs(y_val_true - y_val_pred)

error_mean_per_sensor = np.mean(prediction_errors_per_sensor, axis=0)
error_std_per_sensor = np.std(prediction_errors_per_sensor, axis=0)

anomaly_threshold_per_sensor = error_mean_per_sensor + (3 * error_std_per_sensor)

anomaly_mask_flat = (np.abs(y_val_true - y_val_pred) > anomaly_threshold_per_sensor).flatten()

powerbi_df['IsAnomaly'] = anomaly_mask_flat

print("Combined DataFrame created.")
print(powerbi_df.head())
print(f"DataFrame shape: {powerbi_df.shape}")

# --- Step F: Create and save the smaller DataFrame ---
print("\nCreating and saving a smaller DataFrame for Power BI...")

start_date = '2017-05-01'
end_date = '2017-05-31'

smaller_powerbi_df = powerbi_df[
    (powerbi_df['Date'] >= start_date) & (powerbi_df['Date'] <= end_date)
].copy()

output_file_path = os.path.join(base_path, 'dashboard_data_small.csv')

try:
    smaller_powerbi_df.to_csv(output_file_path, index=False)
    print(f"\nSmaller DataFrame successfully saved to {output_file_path}")
    print("This file is now ready for import into Power BI.")

except Exception as e:
    print(f"An error occurred while saving the CSV file: {e}")

Loading all necessary data and model...




Making predictions and inverse transforming...
[1m326/326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step

Creating DataFrame for Power BI...
Combined DataFrame created.
                 Date    Sensor  TrueSpeed  PredictedSpeed  PredictionError  \
0 2017-05-25 19:20:00  sensor_0       71.1       69.919022         1.180978   
1 2017-05-25 19:20:00  sensor_1       61.8       54.053997         7.746003   
2 2017-05-25 19:20:00  sensor_2       63.7       59.482365         4.217635   
3 2017-05-25 19:20:00  sensor_3       68.2       61.040920         7.159080   
4 2017-05-25 19:20:00  sensor_4       68.1       61.700882         6.399118   

   IsAnomaly  
0      False  
1      False  
2      False  
3      False  
4      False  
DataFrame shape: (20291700, 6)

Creating and saving a smaller DataFrame for Power BI...

Smaller DataFrame successfully saved to /content/drive/MyDrive/smart_traffic_system/dashboard_data_small.csv
This file is now ready for import into Power BI.


In [None]:
!ls "/content/drive/MyDrive/smart_traffic_system"

In [None]:
!ls -R "/content/drive/MyDrive/smart_traffic_system"