In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# 1. Load and prepare dataset
df = pd.read_csv("Realistic_Solar_PV_Fault_100k.csv")

X = df.drop(columns=["Fault_Flag", "DHI (W/m²)", "Sun_Azimuth (degrees)", 
                     "Solar_Elevation (degrees)", "Panel_Orientation (degrees)", 
                     "Longitude", "Latitude", "Panel_Tilt (degrees)"])
y = df["Fault_Flag"]

In [3]:
# 2. Train-test split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Improved ANN
ann = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to avoid overfitting
es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

ann.fit(X_train_scaled, y_train, validation_split=0.2, epochs=200, batch_size=64, verbose=0, callbacks=[es])

# 4. Extract deep features
feature_model = Model(inputs=ann.input, outputs=ann.layers[-2].output)
X_train_deep = feature_model.predict(X_train_scaled)
X_test_deep = feature_model.predict(X_test_scaled)

# Optional: Combine raw + deep features (can improve performance)
X_train_combined = np.hstack([X_train_scaled, X_train_deep])
X_test_combined = np.hstack([X_test_scaled, X_test_deep])

# 5. Hyperparameter tuning for SVC
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf']
}
svc = GridSearchCV(SVC(), param_grid, cv=3, scoring='f1', n_jobs=-1)
svc.fit(X_train_combined, y_train)

# 6. Predict and evaluate
svc_preds = svc.predict(X_test_combined)

In [10]:
print("🔍 Hybrid ANN + SVC (Improved) Results:")
print("Best SVC Params:", svc.best_params_)
print("Accuracy:", accuracy_score(y_test, svc_preds))
print("F1 Score:", f1_score(y_test, svc_preds))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, svc_preds))
print("\nClassification Report:")
print(classification_report(y_test, svc_preds))

🔍 Hybrid ANN + SVC (Improved) Results:
Best SVC Params: {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
Accuracy: 0.9801
F1 Score: 0.8971044467425027

Confusion Matrix:
[[17867   133]
 [  265  1735]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     18000
           1       0.93      0.87      0.90      2000

    accuracy                           0.98     20000
   macro avg       0.96      0.93      0.94     20000
weighted avg       0.98      0.98      0.98     20000

