In [None]:
import pandas as pd
import random
from datetime import datetime, timedelta

def generate_dataset(rows=200000):
    data = []
    timestamp = datetime(2010, 6, 9, 0, 0, 0)

    for _ in range(rows):
        behavior = random.choices(['normal', 'theft', 'waste'], weights=[0.1, 0.4, 0.5])[0]
        voltage = round(random.uniform(228, 232), 2)

        if behavior == 'normal':
            current = round(random.uniform(1.4, 3.0), 2)
            power = round(voltage * current, 2)
            metered_power = power
            productive_power = power
            theft_flag = 0
            waste_flag = 0

        elif behavior == 'theft':
            current = round(random.uniform(3.2, 6.5), 2)
            power = round(voltage * current, 2)
            metered_power = round(power * random.uniform(0.5, 0.7), 2)  # Under-reported
            productive_power = metered_power
            theft_flag = 1
            waste_flag = 0

        elif behavior == 'waste':
            productive_power = round(random.uniform(200, 400), 2)
            current = round(random.uniform(2.5, 4.7), 2)
            power = round(voltage * current, 2)
            metered_power = power
            theft_flag = 0
            waste_flag = 1

        data.append({
            'Timestamp': timestamp.strftime('%Y-%m-%d %H:%M:%S'),
            'Voltage': voltage,
            'Current': current,
            'Power': power,
            'Metered_power': metered_power,
            'Productive_power': productive_power,
            'Theft': theft_flag,
            'Waste': waste_flag,
            'Label': behavior
        })

        timestamp += timedelta(minutes=1)

    return pd.DataFrame(data)

# Generate 10,000 rows of data
df = generate_dataset(200000)

# Show the first 5 rows
print(df.head())

# Save the dataset as a CSV file in Colab's environment
csv_path = "new_energy_dataset.csv"
df.to_csv(csv_path, index=False)
print(f"Dataset saved to {csv_path}")


             Timestamp  Voltage  Current    Power  Metered_power  \
0  2010-06-09 00:00:00   229.32     5.49  1258.97         813.65   
1  2010-06-09 00:01:00   231.22     4.46  1031.24        1031.24   
2  2010-06-09 00:02:00   230.58     4.39  1012.25         533.74   
3  2010-06-09 00:03:00   230.88     4.54  1048.20        1048.20   
4  2010-06-09 00:04:00   230.51     1.99   458.71         458.71   

   Productive_power  Theft  Waste   Label  
0            813.65      1      0   theft  
1            376.80      0      1   waste  
2            533.74      1      0   theft  
3            392.12      0      1   waste  
4            458.71      0      0  normal  
Dataset saved to new_energy_dataset.csv


In [None]:
# Install KerasTuner for hyperparameter optimization
!pip install -q keras-tuner

import pandas as pd
import numpy as np
import os
import joblib
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, mean_squared_error
import keras_tuner as kt
from sklearn.utils.class_weight import compute_class_weight
from google.colab import drive
import shutil

# --- Configuration ---
ORIGINAL_DATASET_FILENAME = 'sri_lanka_energy_dataset.csv'
NEW_DATASET_FILENAME = 'new_energy_dataset.csv'  # New CSV file
MODEL_DIR = 'models_optimized'
NEW_MODEL_DIR = 'models_optimized_v2'  # New directory for new models
DRIVE_MODEL_DIR = '/content/drive/MyDrive/Models_Optimized_V2'
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(NEW_MODEL_DIR, exist_ok=True)

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)
os.makedirs(DRIVE_MODEL_DIR, exist_ok=True)
print(f"Created Google Drive folder: {DRIVE_MODEL_DIR}")

# --- Step 1: Load and Combine Datasets ---
def load_and_validate_csv(filename):
    if not os.path.exists(filename):
        print(f"Error: Dataset file '{filename}' not found.")
        return None
    df = pd.read_csv(filename)
    required_columns = ["Voltage", "Current", "Power", "Theft", "Waste"]
    if not all(col in df.columns for col in required_columns):
        print(f"Error: File '{filename}' is missing required columns: {required_columns}")
        return None
    print(f"Dataset '{filename}' loaded successfully. Preview:")
    print(df.head())
    return df

# Load original dataset
df_original = load_and_validate_csv(os.path.join(DRIVE_MODEL_DIR,(ORIGINAL_DATASET_FILENAME)))
"""
df_original = load_and_validate_csv(ORIGINAL_DATASET_FILENAME)
"""
if df_original is None:
    exit()

# Load new dataset
df_new = load_and_validate_csv(NEW_DATASET_FILENAME)
if df_new is None:
    print("Continuing with original dataset only.")
    df_combined = df_original
else:
    # Combine datasets
    df_combined = pd.concat([df_original, df_new], ignore_index=True)
    print("\nCombined dataset preview:")
    print(df_combined.head())
    print(f"Total rows in combined dataset: {len(df_combined)}")

# Check for missing values
if df_combined.isnull().sum().any():
    print("Warning: Missing values detected. Filling with column means.")
    df_combined = df_combined.fillna(df_combined.mean(numeric_only=True))

# --- Step 2: Preprocessing and Scaling ---
FEATURE_NAMES = ["Voltage", "Current", "Power"]  # Define feature names for scaler
if "Timestamp" in df_combined.columns:
    df_combined = df_combined.drop(columns=["Timestamp"])

X = df_combined[FEATURE_NAMES]
y_theft = df_combined["Theft"]
y_waste = df_combined["Waste"]
y_power = df_combined["Power"]

# Check class balance
print("\nClass Distribution:")
print("Theft:\n", y_theft.value_counts())
print("Waste:\n", y_waste.value_counts())

# Compute class weights
class_weights_theft = compute_class_weight('balanced', classes=np.unique(y_theft), y=y_theft)
class_weights_theft = dict(enumerate(class_weights_theft))
class_weights_waste = compute_class_weight('balanced', classes=np.unique(y_waste), y=y_waste)
class_weights_waste = dict(enumerate(class_weights_waste))
print("\nClass Weights for Theft:", class_weights_theft)
print("Class Weights for Waste:", class_weights_waste)

# Fit scaler on combined data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
joblib.dump(scaler, os.path.join(NEW_MODEL_DIR, "feature_scaler_v2.pkl"))
print("\nScaler has been fitted and saved.")

# Data for classification and regression
X_clf_scaled = X_scaled
X_reg_scaled = X_scaled[:, :2]

# Split datasets
X_train_t, X_test_t, y_train_t, y_test_t = train_test_split(X_clf_scaled, y_theft, test_size=0.2, random_state=42, stratify=y_theft)
X_train_w, X_test_w, y_train_w, y_test_w = train_test_split(X_clf_scaled, y_waste, test_size=0.2, random_state=42, stratify=y_waste)
X_train_p, X_test_p, y_train_p, y_test_p = train_test_split(X_reg_scaled, y_power, test_size=0.2, random_state=42)

# --- Callbacks ---
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# --- Step 3: Model Training ---
def build_classification_model(hp):
    model = Sequential([
        Input(shape=(X_train_t.shape[1],)),
        Dense(units=hp.Int('units_1', min_value=32, max_value=128, step=32), use_bias=False),
        BatchNormalization(),
        LeakyReLU(negative_slope=0.1),
        Dropout(rate=hp.Float('dropout_1', min_value=0.2, max_value=0.4, step=0.1)),
        Dense(units=hp.Int('units_2', min_value=16, max_value=64, step=16)),
        LeakyReLU(negative_slope=0.1),
        Dense(1, activation='sigmoid')
    ])
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 5e-4, 1e-4])
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    return model

def build_regression_model(hp):
    model = Sequential([
        Input(shape=(X_train_p.shape[1],)),
        Dense(units=hp.Int('units_1', min_value=64, max_value=256, step=64)),
        LeakyReLU(negative_slope=0.1),
        Dropout(rate=hp.Float('dropout_1', min_value=0.2, max_value=0.4, step=0.1)),
        Dense(units=hp.Int('units_2', min_value=32, max_value=128, step=32)),
        LeakyReLU(negative_slope=0.1),
        Dense(1)
    ])
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 5e-4, 1e-4])
    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss=tf.keras.losses.MeanSquaredError(), metrics=['mse'])
    return model

# Option 1: Fine-tune existing models (uncomment to use)

print("\n--- Loading Existing Models for Fine-Tuning ---")
theft_model = load_model(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))
waste_model = load_model(os.path.join(DRIVE_MODEL_DIR, "waste_detection_model_v2.keras"))
power_model = load_model(os.path.join(DRIVE_MODEL_DIR, "power_prediction_model_v2.keras"))

# Print model summaries
print("\nTheft Detection Model Summary:")
print(theft_model.summary())
print("\nWaste Detection Model Summary:")
print(waste_model.summary())
print("\nPower Prediction Model Summary:")
print(power_model.summary())

theft_model.fit(X_train_t, y_train_t, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_theft, verbose=1)
waste_model.fit(X_train_w, y_train_w, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_waste, verbose=1)
power_model.fit(X_train_p, y_train_p, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], verbose=1)

# Evaluate models
theft_preds = (theft_model.predict(X_test_t, verbose=0) > 0.5).astype(int).flatten()
print("\nTheft Detection Report:")
print(classification_report(y_test_t, theft_preds))

waste_preds = (waste_model.predict(X_test_w, verbose=0) > 0.5).astype(int).flatten()
print("\nWaste Detection Report:")
print(classification_report(y_test_w, waste_preds))

power_preds = power_model.predict(X_test_p, verbose=0).flatten()
mse = mean_squared_error(y_test_p, power_preds)
rmse = np.sqrt(mse)
print(f"\nPower Prediction Mean Squared Error: {mse:.2f}")
print(f"Power Prediction Root Mean Squared Error: {rmse:.2f}")

# Option 2: Train new models from scratch
"""
print("\n--- Tuning and Training Theft Detection Model ---")
tuner_theft = kt.Hyperband(build_classification_model, objective='val_accuracy', max_epochs=30, factor=3, directory='keras_tuner', project_name='theft_detection_v2')
tuner_theft.search(X_train_t, y_train_t, epochs=30, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_theft)
best_hps_theft = tuner_theft.get_best_hyperparameters(num_trials=1)[0]
theft_model = tuner_theft.hypermodel.build(best_hps_theft)
theft_model.fit(X_train_t, y_train_t, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_theft, verbose=1)
theft_preds = (theft_model.predict(X_test_t) > 0.5).astype(int).flatten()
print("\nTheft Detection Report:")
print(classification_report(y_test_t, theft_preds))

print("\n--- Tuning and Training Waste Detection Model ---")
tuner_waste = kt.Hyperband(build_classification_model, objective='val_accuracy', max_epochs=30, factor=3, directory='keras_tuner', project_name='waste_detection_v2')
tuner_waste.search(X_train_w, y_train_w, epochs=30, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_waste)
best_hps_waste = tuner_waste.get_best_hyperparameters(num_trials=1)[0]
waste_model = tuner_waste.hypermodel.build(best_hps_waste)
waste_model.fit(X_train_w, y_train_w, epochs=50, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], class_weight=class_weights_waste, verbose=1)
waste_preds = (waste_model.predict(X_test_w) > 0.5).astype(int).flatten()
print("\nWaste Detection Report:")
print(classification_report(y_test_w, waste_preds))

print("\n--- Tuning and Training Power Prediction Model ---")
tuner_power = kt.Hyperband(build_regression_model, objective='val_loss', max_epochs=50, factor=3, directory='keras_tuner', project_name='power_prediction_v2')
tuner_power.search(X_train_p, y_train_p, epochs=50, validation_split=0.2, callbacks=[early_stop, reduce_lr])
best_hps_power = tuner_power.get_best_hyperparameters(num_trials=1)[0]
power_model = tuner_power.hypermodel.build(best_hps_power)
power_model.fit(X_train_p, y_train_p, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stop, reduce_lr], verbose=1)
power_preds = power_model.predict(X_test_p).flatten()
mse = mean_squared_error(y_test_p, power_preds)
rmse = np.sqrt(mse)
print(f"\nPower Prediction Mean Squared Error: {mse:.2f}")
print(f"Power Prediction Root Mean Squared Error: {rmse:.2f}")
"""

# --- Step 4: Save New Models ---
theft_model.save(os.path.join(NEW_MODEL_DIR, "theft_detection_model_v2.keras"))
waste_model.save(os.path.join(NEW_MODEL_DIR, "waste_detection_model_v2.keras"))
power_model.save(os.path.join(NEW_MODEL_DIR, "power_prediction_model_v2.keras"))
print(f"\nNew models and scaler saved in the '{NEW_MODEL_DIR}' directory.")

# --- Step 6: Save New Models to Google Drive ---
theft_model.save(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))
waste_model.save(os.path.join(DRIVE_MODEL_DIR, "waste_detection_model_v2.keras"))
power_model.save(os.path.join(DRIVE_MODEL_DIR, "power_prediction_model_v2.keras"))
shutil.copy(os.path.join(NEW_MODEL_DIR, "feature_scaler_v2.pkl"), os.path.join(DRIVE_MODEL_DIR, "feature_scaler_v2.pkl"))
print(f"\nNew models and scaler saved in Google Drive at: {DRIVE_MODEL_DIR}")

# --- Step 7: Inference Example ---
print("\n--- Inference Example with New Models ---")
"""
loaded_scaler = joblib.load(os.path.join(NEW_MODEL_DIR, "feature_scaler_v2.pkl"))
loaded_theft_model = load_model(os.path.join(NEW_MODEL_DIR, "theft_detection_model_v2.keras"))
"""
loaded_scaler = joblib.load(os.path.join(DRIVE_MODEL_DIR, "feature_scaler_v2.pkl"))
loaded_theft_model = load_model(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))

# Convert sample to DataFrame to avoid StandardScaler warning
sample = pd.DataFrame([[235, 1.5, 350]], columns=FEATURE_NAMES)
scaled_sample = loaded_scaler.transform(sample)

theft_prediction_prob = loaded_theft_model.predict(scaled_sample)
theft_status = "Theft Detected" if theft_prediction_prob[0][0] > 0.5 else "Normal"
print(f"Sample Input: {sample.values}")
print(f"Theft Probability: {theft_prediction_prob[0][0]:.4f} ({theft_status})")

"""
loaded_power_model = load_model(os.path.join(NEW_MODEL_DIR, "power_prediction_model_v2.keras"))
"""
loaded_power_model = load_model(os.path.join(DRIVE_MODEL_DIR, "power_prediction_model_v2.keras"))
sample_for_power = scaled_sample[:, :2]
predicted_power = loaded_power_model.predict(sample_for_power)
print(f"Predicted Power based on {sample[['Voltage', 'Current']].values[0]}: {predicted_power[0][0]:.2f} W")

Mounted at /content/drive
Created Google Drive folder: /content/drive/MyDrive/Models_Optimized_V2
Dataset '/content/drive/MyDrive/Models_Optimized_V2/sri_lanka_energy_dataset.csv' loaded successfully. Preview:
             Timestamp  Voltage  Current   Power  Metered_power  \
0  2010-06-09 00:00:00   231.15     4.26  984.70         583.37   
1  2010-06-09 00:01:00   230.36     1.74  400.83         400.83   
2  2010-06-09 00:02:00   228.55     1.51  345.11         345.11   
3  2010-06-09 00:03:00   229.97     3.49  802.60         510.97   
4  2010-06-09 00:04:00   229.10     2.67  611.70         611.70   

   Productive_power  Theft  Waste   Label  
0            583.37      1      0   theft  
1            400.83      0      0  normal  
2            345.11      0      0  normal  
3            510.97      1      0   theft  
4            611.70      0      0  normal  
Dataset 'new_energy_dataset.csv' loaded successfully. Preview:
             Timestamp  Voltage  Current    Power  Metered_p

None

Waste Detection Model Summary:


None

Power Prediction Model Summary:


None
Epoch 1/20
[1m4200/4200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 4ms/step - accuracy: 0.7608 - loss: 0.4286 - val_accuracy: 0.7725 - val_loss: 0.3936 - learning_rate: 1.0000e-06
Epoch 2/20
[1m4200/4200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.7601 - loss: 0.4299 - val_accuracy: 0.7688 - val_loss: 0.3947 - learning_rate: 1.0000e-06
Epoch 3/20
[1m4200/4200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.7574 - loss: 0.4287 - val_accuracy: 0.7717 - val_loss: 0.3938 - learning_rate: 1.0000e-06
Epoch 4/20
[1m4200/4200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.7577 - loss: 0.4273 - val_accuracy: 0.7714 - val_loss: 0.3945 - learning_rate: 1.0000e-06
Epoch 5/20
[1m4200/4200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.7569 - loss: 0.4272 - val_accuracy: 0.7661 - val_loss: 0.3970 - learning_rate: 1.0000e-06
Epoch 6/20
[1m4200/4200[0m 

In [None]:
import pandas as pd
import numpy as np
import os
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from google.colab import drive

# --- Configuration ---
DRIVE_MODEL_DIR = '/content/drive/MyDrive/Models_Optimized_V2'
FEATURE_NAMES = ["Voltage", "Current", "Power"]

# --- Step 1: Mount Google Drive ---
drive.mount('/content/drive', force_remount=True)
print(f"Accessing Google Drive folder: {DRIVE_MODEL_DIR}")

# --- Step 2: Load Scaler and Models ---
def load_scaler_and_models():
    try:
        scaler = joblib.load(os.path.join(DRIVE_MODEL_DIR, "feature_scaler_v2.pkl"))
        theft_model = load_model(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))
        waste_model = load_model(os.path.join(DRIVE_MODEL_DIR, "waste_detection_model_v2.keras"))
        power_model = load_model(os.path.join(DRIVE_MODEL_DIR, "power_prediction_model_v2.keras"))
        print("Scaler and models loaded successfully.")
        return scaler, theft_model, waste_model, power_model
    except Exception as e:
        print(f"Error loading scaler or models: {e}")
        return None, None, None, None

scaler, theft_model, waste_model, power_model = load_scaler_and_models()
if scaler is None or any(model is None for model in [theft_model, waste_model, power_model]):
    print("Failed to load scaler or models. Exiting.")
    exit()

# --- Step 3: Function to Get User Input ---
def get_user_input():
    try:
        print("\nEnter input parameters (or press Enter to use default sample: Voltage=235, Current=1.5, Power=350):")
        voltage = input("Voltage (V): ").strip()
        current = input("Current (A): ").strip()
        power = input("Power (W): ").strip()

        # Use default sample if no input provided
        if not voltage and not current and not power:
            print("Using default sample: Voltage=235, Current=1.5, Power=350")
            return pd.DataFrame([[235, 1.5, 350]], columns=FEATURE_NAMES)

        # Validate and convert inputs
        voltage = float(voltage)
        current = float(current)
        power = float(power)
        return pd.DataFrame([[voltage, current, power]], columns=FEATURE_NAMES)
    except ValueError as e:
        print(f"Invalid input. Please enter numeric values. Error: {e}")
        return None

# --- Step 4: Function to Make Predictions ---
def make_predictions(sample_df, scaler, theft_model, waste_model, power_model):
    try:
        # Scale the input sample
        scaled_sample = scaler.transform(sample_df)
        scaled_sample_for_power = scaled_sample[:, :2]  # Power model uses only Voltage and Current

        # Theft Detection Prediction
        theft_prob = theft_model.predict(scaled_sample, verbose=0)[0][0]
        theft_status = "Theft Detected" if theft_prob > 0.5 else "Normal"

        # Waste Detection Prediction
        waste_prob = waste_model.predict(scaled_sample, verbose=0)[0][0]
        waste_status = "Waste Detected" if waste_prob > 0.5 else "Normal"

        # Power Prediction
        predicted_power = power_model.predict(scaled_sample_for_power, verbose=0)[0][0]

        # Display Results
        print("\n--- Prediction Results ---")
        print(f"Input Parameters: Voltage={sample_df['Voltage'].values[0]} V, "
              f"Current={sample_df['Current'].values[0]} A, Power={sample_df['Power'].values[0]} W")
        print(f"Theft Prediction: Probability={theft_prob:.4f} ({theft_status})")
        print(f"Waste Prediction: Probability={waste_prob:.4f} ({waste_status})")
        print(f"Power Prediction: {predicted_power:.2f} W")
    except Exception as e:
        print(f"Error during prediction: {e}")

# --- Step 5: Main Execution ---
while True:
    sample_df = get_user_input()
    if sample_df is None:
        print("Try again with valid inputs.")
        continue

    make_predictions(sample_df, scaler, theft_model, waste_model, power_model)

    # Ask if user wants to test another sample
    retry = input("\nDo you want to test another sample? (yes/no): ").strip().lower()
    if retry != 'yes':
        print("Exiting.")
        break


Mounted at /content/drive
Accessing Google Drive folder: /content/drive/MyDrive/Models_Optimized_V2
Scaler and models loaded successfully.

Enter input parameters (or press Enter to use default sample: Voltage=235, Current=1.5, Power=350):
Voltage (V): 222
Current (A): 5.2
Power (W): 690





--- Prediction Results ---
Input Parameters: Voltage=222.0 V, Current=5.2 A, Power=690.0 W
Theft Prediction: Probability=0.3118 (Normal)
Waste Prediction: Probability=0.0971 (Normal)
Power Prediction: 1228.97 W

Do you want to test another sample? (yes/no): yes

Enter input parameters (or press Enter to use default sample: Voltage=235, Current=1.5, Power=350):
Voltage (V): 230
Current (A): 3
Power (W): 1200

--- Prediction Results ---
Input Parameters: Voltage=230.0 V, Current=3.0 A, Power=1200.0 W
Theft Prediction: Probability=0.8006 (Theft Detected)
Waste Prediction: Probability=0.3462 (Normal)
Power Prediction: 690.31 W

Do you want to test another sample? (yes/no): no
Exiting.


In [None]:
from google.colab import files
files.upload()


Saving smart_grid_dataset.csv to smart_grid_dataset.csv


{'smart_grid_dataset.csv': b'Timestamp,Voltage,Current,Power,Theft,Waste\r\n1/1/2024 0:00,232.4835708,5.124672741,232483.5708,0,1\r\n1/1/2024 0:15,229.3086785,22.21351868,229308.6785,1,0\r\n1/1/2024 0:30,233.2384427,46.13221716,233238.4427,0,0\r\n1/1/2024 0:45,237.6151493,47.64897167,237615.1493,0,0\r\n1/1/2024 1:00,228.8292331,7.410166176,228829.2331,0,1\r\n1/1/2024 1:15,228.8293152,10.55400862,228829.3152,0,1\r\n1/1/2024 1:30,237.8960641,5.219165527,237896.0641,1,0\r\n1/1/2024 1:45,233.8371736,44.09031705,233837.1736,0,0\r\n1/1/2024 2:00,227.6526281,8.189880162,227652.6281,0,1\r\n1/1/2024 2:15,232.7128002,9.85550685,232712.8002,1,0\r\n1/1/2024 2:30,227.6829115,15.24301923,227682.9115,0,0\r\n1/1/2024 2:45,227.6713512,46.27622982,227671.3512,0,0\r\n1/1/2024 3:00,231.2098114,35.97053033,231209.8114,0,0\r\n1/1/2024 3:15,220.4335988,32.75966119,220433.5988,0,0\r\n1/1/2024 3:30,221.3754108,40.30956076,221375.4108,0,0\r\n1/1/2024 3:45,227.1885624,36.19780239,227188.5624,1,0\r\n1/1/2024 4:00

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d ziya07/smart-grid-real-time-load-monitoring-dataset
!kaggle datasets download -d ziya07/powergridsense-dataset
!kaggle datasets download -d plegmalabs/handful-project-energy-data
!kaggle datasets download -d ziya07/smart-grid-stability-and-reliability-dataset
!kaggle datasets download -d ziya07/iot-enabled-smart-grid-dataset

Dataset URL: https://www.kaggle.com/datasets/ziya07/smart-grid-real-time-load-monitoring-dataset
License(s): CC0-1.0
Downloading smart-grid-real-time-load-monitoring-dataset.zip to /content
  0% 0.00/5.49M [00:00<?, ?B/s]
100% 5.49M/5.49M [00:00<00:00, 258MB/s]
Dataset URL: https://www.kaggle.com/datasets/ziya07/powergridsense-dataset
License(s): CC0-1.0
Downloading powergridsense-dataset.zip to /content
  0% 0.00/487k [00:00<?, ?B/s]
100% 487k/487k [00:00<00:00, 456MB/s]
Dataset URL: https://www.kaggle.com/datasets/plegmalabs/handful-project-energy-data
License(s): GPL-3.0
Downloading handful-project-energy-data.zip to /content
100% 918M/919M [00:04<00:00, 208MB/s]
100% 919M/919M [00:04<00:00, 209MB/s]
Dataset URL: https://www.kaggle.com/datasets/ziya07/smart-grid-stability-and-reliability-dataset
License(s): CC0-1.0
Downloading smart-grid-stability-and-reliability-dataset.zip to /content
  0% 0.00/167k [00:00<?, ?B/s]
100% 167k/167k [00:00<00:00, 310MB/s]
Dataset URL: https://www.kag

In [None]:
import zipfile
zipfile.ZipFile("handful-project-energy-data.zip", "r").extractall("dataset")
zipfile.ZipFile("iot-enabled-smart-grid-dataset.zip", "r").extractall("dataset")
zipfile.ZipFile("powergridsense-dataset.zip", "r").extractall("dataset")
zipfile.ZipFile("smart-grid-real-time-load-monitoring-dataset.zip", "r").extractall("dataset")
zipfile.ZipFile("smart-grid-stability-and-reliability-dataset.zip", "r").extractall("dataset")

# 🔧 Advanced Fine-Tuning Guide for FYRP AI Models

## 📊 Current Fine-Tuning Capabilities

This notebook supports **two training modes**:

### 1️⃣ **Fine-Tuning Existing Models** (Active in Cell 2)
- Loads pre-trained models from Google Drive
- Continues training with new data
- Faster training (20-50 epochs)
- Preserves previous learning

### 2️⃣ **Hyperparameter Optimization** (Commented in Cell 2)
- Uses Keras Tuner (Hyperband algorithm)
- Automatically searches for optimal configurations
- Tests multiple combinations of parameters

---

## 🎯 Tunable Parameters

| Parameter | Classification Models | Regression Model | Description |
|-----------|----------------------|------------------|-------------|
| **Layer 1 Units** | 32-128 (step 32) | 64-256 (step 64) | Neurons in first hidden layer |
| **Layer 2 Units** | 16-64 (step 16) | 32-128 (step 32) | Neurons in second hidden layer |
| **Dropout Rate** | 0.2-0.4 (step 0.1) | 0.2-0.4 (step 0.1) | Prevents overfitting |
| **Learning Rate** | [1e-3, 5e-4, 1e-4] | [1e-3, 5e-4, 1e-4] | Step size for optimization |
| **Batch Size** | 32 (configurable) | 32 (configurable) | Samples per gradient update |
| **Epochs** | 20-50 | 50-100 | Training iterations |

---

## 💡 How to Fine-Tune

### **Method 1: Quick Fine-Tuning (Current Setup)**
```python
# In Cell 2, keep Option 1 active
theft_model.fit(X_train_t, y_train_t, 
                epochs=20,  # ← Increase for more training
                batch_size=32,  # ← Adjust for memory/speed
                validation_split=0.2,
                callbacks=[early_stop, reduce_lr],
                class_weight=class_weights_theft)
```

### **Method 2: Automated Hyperparameter Search**
```python
# In Cell 2, uncomment Option 2
# Uses Keras Tuner to find best configuration automatically
```

---

## 🚀 Advanced Techniques You Can Add

### 1. **Learning Rate Scheduling**
- Current: ReduceLROnPlateau (reduces LR when loss plateaus)
- Can add: CyclicLR, ExponentialDecay

### 2. **Data Augmentation**
- Add noise to voltage/current readings
- Synthetic minority oversampling (SMOTE)

### 3. **Ensemble Methods**
- Train multiple models with different seeds
- Average predictions for better accuracy

### 4. **Transfer Learning**
- Use models trained on similar energy datasets
- Fine-tune only top layers

### 5. **Advanced Architectures**
- Add LSTM layers for time-series patterns
- Add Attention mechanisms
- Try deeper networks (3-4 layers)

---

## 📈 Performance Monitoring

Track these metrics during fine-tuning:
- **Accuracy** (Classification)
- **Precision/Recall** (Theft/Waste detection)
- **MSE/RMSE** (Power prediction)
- **Training time**
- **Model size**

---

## ⚠️ Common Fine-Tuning Issues

| Issue | Solution |
|-------|----------|
| **Overfitting** | Increase dropout, reduce epochs, add regularization |
| **Underfitting** | Increase model capacity, train longer, reduce dropout |
| **Slow convergence** | Increase learning rate, use Adam optimizer |
| **Class imbalance** | Use class weights (already implemented) |
| **High variance** | Increase training data, use k-fold cross-validation |

---

## 🎓 Next Steps

Run the cells below to experiment with different fine-tuning strategies!

In [None]:
# 🔬 Experiment: Custom Fine-Tuning Configuration
# Run this cell to fine-tune with custom parameters

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)
DRIVE_MODEL_DIR = '/content/drive/MyDrive/Models_Optimized_V2'

# Load existing model
print("Loading model for fine-tuning...")
theft_model = load_model(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))

# --- Custom Fine-Tuning Parameters ---
FINE_TUNE_CONFIG = {
    'epochs': 30,              # ← Adjust training duration
    'batch_size': 64,          # ← Larger batch = faster but more memory
    'learning_rate': 1e-4,     # ← Lower LR for fine-tuning
    'validation_split': 0.2,
    'early_stop_patience': 15,
    'reduce_lr_patience': 5
}

print("\n🔧 Fine-Tuning Configuration:")
for key, value in FINE_TUNE_CONFIG.items():
    print(f"  {key}: {value}")

# Custom learning rate schedule
def lr_schedule(epoch, lr):
    """Gradually decrease learning rate"""
    if epoch < 10:
        return lr
    elif epoch < 20:
        return lr * 0.5
    else:
        return lr * 0.1

# Callbacks with custom parameters
callbacks = [
    EarlyStopping(
        monitor='val_loss', 
        patience=FINE_TUNE_CONFIG['early_stop_patience'],
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=FINE_TUNE_CONFIG['reduce_lr_patience'],
        min_lr=1e-7,
        verbose=1
    ),
    LearningRateScheduler(lr_schedule, verbose=1)
]

# Recompile with new learning rate
theft_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=FINE_TUNE_CONFIG['learning_rate']),
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

print("\n🚀 Starting fine-tuning...")
print(f"Training samples: {len(X_train_t)}, Validation samples: {int(len(X_train_t) * FINE_TUNE_CONFIG['validation_split'])}")

# Fine-tune the model
history = theft_model.fit(
    X_train_t, y_train_t,
    epochs=FINE_TUNE_CONFIG['epochs'],
    batch_size=FINE_TUNE_CONFIG['batch_size'],
    validation_split=FINE_TUNE_CONFIG['validation_split'],
    callbacks=callbacks,
    class_weight=class_weights_theft,
    verbose=1
)

# Evaluate
theft_preds = (theft_model.predict(X_test_t, verbose=0) > 0.5).astype(int).flatten()
print("\n✅ Fine-Tuning Complete!")
print("\nUpdated Model Performance:")
print(classification_report(y_test_t, theft_preds))

# Save fine-tuned model
theft_model.save(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2_finetuned.keras"))
print(f"\n💾 Fine-tuned model saved to: {DRIVE_MODEL_DIR}/theft_detection_model_v2_finetuned.keras")

In [None]:
# 📊 Visualize Training History
# Run this after fine-tuning to see performance graphs

import matplotlib.pyplot as plt

def plot_training_history(history):
    """Visualize training metrics over epochs"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('Fine-Tuning Performance Metrics', fontsize=16, fontweight='bold')
    
    # Plot 1: Loss
    axes[0, 0].plot(history.history['loss'], label='Training Loss', linewidth=2)
    axes[0, 0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    axes[0, 0].set_title('Model Loss Over Epochs', fontweight='bold')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Accuracy
    axes[0, 1].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
    axes[0, 1].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
    axes[0, 1].set_title('Model Accuracy Over Epochs', fontweight='bold')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Precision
    if 'precision' in history.history:
        axes[1, 0].plot(history.history['precision'], label='Training Precision', linewidth=2)
        axes[1, 0].plot(history.history['val_precision'], label='Validation Precision', linewidth=2)
        axes[1, 0].set_title('Model Precision Over Epochs', fontweight='bold')
        axes[1, 0].set_xlabel('Epoch')
        axes[1, 0].set_ylabel('Precision')
        axes[1, 0].legend()
        axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Recall
    if 'recall' in history.history:
        axes[1, 1].plot(history.history['recall'], label='Training Recall', linewidth=2)
        axes[1, 1].plot(history.history['val_recall'], label='Validation Recall', linewidth=2)
        axes[1, 1].set_title('Model Recall Over Epochs', fontweight='bold')
        axes[1, 1].set_xlabel('Epoch')
        axes[1, 1].set_ylabel('Recall')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Print summary statistics
    print("\n📈 Training Summary:")
    print(f"  Final Training Loss: {history.history['loss'][-1]:.4f}")
    print(f"  Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
    print(f"  Final Training Accuracy: {history.history['accuracy'][-1]:.4f}")
    print(f"  Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
    print(f"  Best Validation Loss: {min(history.history['val_loss']):.4f}")
    print(f"  Best Validation Accuracy: {max(history.history['val_accuracy']):.4f}")
    print(f"  Total Epochs Trained: {len(history.history['loss'])}")

# Visualize the training history from the previous cell
if 'history' in locals():
    plot_training_history(history)
else:
    print("⚠️ No training history found. Run the fine-tuning cell first!")

In [None]:
# 🎯 Advanced: Freeze and Unfreeze Layers for Transfer Learning
# Fine-tune only specific layers of the model

from tensorflow.keras.models import load_model
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
DRIVE_MODEL_DIR = '/content/drive/MyDrive/Models_Optimized_V2'

# Load model
model = load_model(os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras"))

print("🔍 Model Layer Structure:")
for i, layer in enumerate(model.layers):
    print(f"  Layer {i}: {layer.name} ({layer.__class__.__name__}) - Trainable: {layer.trainable}")

print("\n" + "="*60)
print("🧊 STRATEGY 1: Freeze Early Layers (Feature Extraction)")
print("="*60)
# Freeze first layers, train only the last layers
for layer in model.layers[:-2]:  # Freeze all except last 2 layers
    layer.trainable = False

print("\nUpdated Layer States:")
for i, layer in enumerate(model.layers):
    print(f"  Layer {i}: {layer.name} - Trainable: {layer.trainable}")

# Recompile model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("\n✅ Model ready for fine-tuning with frozen layers!")
print("   Only the last 2 layers will be updated during training.")

print("\n" + "="*60)
print("🔥 STRATEGY 2: Gradual Unfreezing")
print("="*60)
print("Train in stages:")
print("  Stage 1: Freeze all layers except output layer → Train 10 epochs")
print("  Stage 2: Unfreeze last 2 layers → Train 10 epochs")
print("  Stage 3: Unfreeze all layers with low LR → Train 10 epochs")

# Example: Unfreeze all layers
print("\n🔓 Unfreezing all layers for full fine-tuning...")
for layer in model.layers:
    layer.trainable = True

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),  # Very low LR for stability
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("\n✅ All layers unfrozen! Ready for full model fine-tuning.")

In [None]:
# 🔬 Compare Multiple Fine-Tuning Strategies
# Run experiments to find the best approach

import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model, clone_model
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import time

# Experiment configurations
EXPERIMENTS = {
    'baseline': {
        'learning_rate': 1e-3,
        'epochs': 10,
        'batch_size': 32,
        'description': 'Default configuration'
    },
    'low_lr': {
        'learning_rate': 1e-5,
        'epochs': 20,
        'batch_size': 32,
        'description': 'Very low learning rate, more epochs'
    },
    'large_batch': {
        'learning_rate': 1e-3,
        'epochs': 15,
        'batch_size': 128,
        'description': 'Larger batch size for faster training'
    },
    'small_batch': {
        'learning_rate': 5e-4,
        'epochs': 20,
        'batch_size': 16,
        'description': 'Smaller batch for better generalization'
    }
}

def run_experiment(model_path, experiment_name, config, X_train, y_train, X_test, y_test, class_weights):
    """Run a single fine-tuning experiment"""
    
    print(f"\n{'='*60}")
    print(f"🧪 Experiment: {experiment_name}")
    print(f"   {config['description']}")
    print(f"{'='*60}")
    
    # Load fresh model
    model = load_model(model_path)
    
    # Compile with experiment config
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=config['learning_rate']),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    
    # Train
    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=config['epochs'],
        batch_size=config['batch_size'],
        validation_split=0.2,
        class_weight=class_weights,
        verbose=0,  # Silent training
        callbacks=[
            EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        ]
    )
    training_time = time.time() - start_time
    
    # Evaluate
    y_pred = (model.predict(X_test, verbose=0) > 0.5).astype(int).flatten()
    
    # Calculate metrics
    results = {
        'experiment': experiment_name,
        'accuracy': accuracy_score(y_test, y_pred),
        'precision': precision_score(y_test, y_pred, zero_division=0),
        'recall': recall_score(y_test, y_pred, zero_division=0),
        'f1_score': f1_score(y_test, y_pred, zero_division=0),
        'training_time': training_time,
        'epochs_trained': len(history.history['loss']),
        'final_val_loss': history.history['val_loss'][-1],
        'config': config
    }
    
    print(f"\n✅ Results:")
    print(f"   Accuracy:  {results['accuracy']:.4f}")
    print(f"   Precision: {results['precision']:.4f}")
    print(f"   Recall:    {results['recall']:.4f}")
    print(f"   F1 Score:  {results['f1_score']:.4f}")
    print(f"   Training Time: {results['training_time']:.2f}s")
    print(f"   Epochs: {results['epochs_trained']}")
    
    return results

# Run all experiments
print("🚀 Starting Fine-Tuning Comparison...")
print(f"   Running {len(EXPERIMENTS)} experiments")

model_path = os.path.join(DRIVE_MODEL_DIR, "theft_detection_model_v2.keras")
all_results = []

for exp_name, config in EXPERIMENTS.items():
    results = run_experiment(
        model_path, exp_name, config,
        X_train_t, y_train_t, X_test_t, y_test_t,
        class_weights_theft
    )
    all_results.append(results)

# Compare results
print("\n" + "="*80)
print("📊 EXPERIMENT COMPARISON")
print("="*80)

comparison_df = pd.DataFrame([
    {
        'Experiment': r['experiment'],
        'Accuracy': f"{r['accuracy']:.4f}",
        'Precision': f"{r['precision']:.4f}",
        'Recall': f"{r['recall']:.4f}",
        'F1 Score': f"{r['f1_score']:.4f}",
        'Time (s)': f"{r['training_time']:.1f}",
        'Epochs': r['epochs_trained']
    }
    for r in all_results
])

print(comparison_df.to_string(index=False))

# Find best configuration
best_exp = max(all_results, key=lambda x: x['f1_score'])
print(f"\n🏆 BEST CONFIGURATION: {best_exp['experiment']}")
print(f"   F1 Score: {best_exp['f1_score']:.4f}")
print(f"   Configuration: {best_exp['config']['description']}")
print(f"   Learning Rate: {best_exp['config']['learning_rate']}")
print(f"   Batch Size: {best_exp['config']['batch_size']}")