In [11]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
import logging
import time
import os
import matplotlib.pyplot as plt
import smtplib
from email.message import EmailMessage

# ==============================
# Logging Configuration
# ==============================
logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("system_log.log"),
        logging.StreamHandler()
    ]
)

# Load the datasets (replace 'path_to_*' with actual file paths)
data_path = '../data/'

# Load the datasets
dos_data = pd.read_csv(os.path.join(data_path, 'DoS_dataset.csv'), nrows=1000) 
fuzzy_data = pd.read_csv(os.path.join(data_path, 'Fuzzy_dataset.csv'),nrows=1000)
gear_data = pd.read_csv(os.path.join(data_path, 'gear_dataset.csv'), nrows=1000)
rpm_data = pd.read_csv(os.path.join(data_path, 'RPM_dataset.csv'), nrows=1000)

# Concatenate datasets
data = pd.concat([dos_data, fuzzy_data, gear_data, rpm_data], axis=0)
logging.info(f"Data loaded and concatenated. Total rows: {data.shape[0]}, Total columns: {data.shape[1]}")

# Separate numeric and categorical columns
numeric_features = data.select_dtypes(include=['float64', 'int64']).columns
categorical_features = data.select_dtypes(include=['object']).columns

# Define preprocessing for numeric features (impute missing values with mean, scale them)
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

# Define preprocessing for categorical features (impute missing values with mode, one-hot encode them)
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))])

# Combine both transformers into a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Apply transformations to the data
data_preprocessed = preprocessor.fit_transform(data)
logging.info(f"Data preprocessing complete. Shape after preprocessing: {data_preprocessed.shape}")

# Convert the processed data into a PyTorch tensor
data_tensor = torch.tensor(data_preprocessed, dtype=torch.float32)

# Create DataLoader
batch_size = 64
dataset = TensorDataset(data_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
logging.info(f"DataLoader created with batch size {batch_size}.")

# ==============================
# Autoencoder Model
# ==============================
class Autoencoder(nn.Module):
    def __init__(self, input_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16)
        )
        self.decoder = nn.Sequential(
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_size),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# Initialize the Autoencoder
input_size = data_preprocessed.shape[1]
model = Autoencoder(input_size)
logging.info(f"Autoencoder initialized with input size {input_size}.")

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the Autoencoder
num_epochs = 50
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs = batch[0]
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, inputs)
        loss.backward()
        optimizer.step()
    
    logging.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

# ==============================
# Anomaly Detection and Evaluation
# ==============================
def calculate_reconstruction_errors(data_loader, model):
    model.eval()
    reconstruction_errors = []
    for batch in data_loader:
        inputs = batch[0]
        outputs = model(inputs)
        loss = criterion(outputs, inputs)
        reconstruction_errors.append(loss.item())
    logging.info("Reconstruction errors calculated.")
    return reconstruction_errors

# Calculate reconstruction errors
reconstruction_errors = calculate_reconstruction_errors(dataloader, model)

# ==============================
# Fine-Tuning Threshold with Percentile
# ==============================
def set_threshold_based_on_percentile(reconstruction_errors, percentile=95):
    threshold = np.percentile(reconstruction_errors, percentile)
    logging.info(f"Threshold set at {percentile}th percentile: {threshold}")
    return threshold

# Set the threshold based on the 95th percentile
threshold = set_threshold_based_on_percentile(reconstruction_errors, percentile=95)

# ==============================
# Email Alert Setup (Using EmailMessage)
# ==============================
def send_email_alert(subject, message):
    # Define your SMTP email server details (for example, using Gmail's SMTP server)
    smtp_server = "smtp.gmail.com"
    smtp_port = 587
    email_from = "gopalghule05@gmail.com"
    email_to = "gopalghule05@gmail.com"
    password = "weyaycicumjfnxnn"
    
    # Create the email message
    email_msg = EmailMessage()
    email_msg['From'] = email_from
    email_msg['To'] = email_to
    email_msg['Subject'] = subject
    email_msg.set_content(message)
    
    # Send the email
    try:
        server = smtplib.SMTP(smtp_server, smtp_port)
        server.starttls()
        server.login(email_from, password)
        server.send_message(email_msg)
        server.quit()
        logging.info(f"Email sent to {email_to}")
    except Exception as e:
        logging.error(f"Failed to send email: {e}")

# ==============================
# Mitigation Process with Email Alerts After Mitigation
# ==============================
def alert_driver(message):
    try:
        logging.warning(f"Driver Alert: {message}")
        #send_email_alert("Driver Alert", message)
        send_email_alert("Mitigation Action", f"Driver has been alerted: {message}")
        logging.info("Driver alert successfully sent.")
    except Exception as e:
        logging.error(f"Failed to alert driver: {e}")
        send_email_alert("Driver Alert Failure", f"Failed to alert driver: {e}")

def isolate_ecu(ecu_id):
    try:
        logging.error(f"ECU {ecu_id} isolated due to suspicious activity.")
        #send_email_alert("ECU Isolation", f"ECU {ecu_id} has been isolated due to suspicious activity.")
        send_email_alert("Mitigation Action", f"Mitigation performed: ECU {ecu_id} isolated.")
        logging.info(f"ECU {ecu_id} isolation successfully performed.")
    except Exception as e:
        logging.error(f"Failed to isolate ECU: {e}")
        send_email_alert("ECU Isolation Failure", f"Failed to isolate ECU {ecu_id}: {e}")

def trigger_safe_mode():
    try:
        logging.critical("Safe mode activated due to critical anomaly detection.")
        #send_email_alert("Critical Alert", "Safe mode activated due to critical anomaly detection.")
        send_email_alert("Mitigation Action", "Mitigation performed: Safe mode activated.")
        logging.info("Safe mode successfully triggered.")
    except Exception as e:
        logging.error(f"Failed to trigger safe mode: {e}")
        send_email_alert("Safe Mode Failure", f"Failed to trigger safe mode: {e}")

# ==============================
# Confirmation Step
# ==============================
def check_ecu_isolation(ecu_id):
    # Simulate a check for ECU isolation status (replace with actual check)
    isolated = True  # Simulate successful isolation
    if isolated:
        logging.info(f"ECU {ecu_id} isolation confirmed.")
        send_email_alert(f"ECU {ecu_id} Isolation Confirmation", "ECU isolation confirmed.")
    else:
        logging.error(f"Failed to confirm isolation of ECU {ecu_id}.")
        send_email_alert(f"ECU {ecu_id} Isolation Failure", f"Failed to confirm isolation of ECU {ecu_id}.")

# ==============================
# Retry Mechanism
# ==============================
def retry_action(action, max_retries=3):
    for attempt in range(max_retries):
        try:
            action()
            return True  # Action succeeded
        except Exception as e:
            logging.warning(f"Attempt {attempt + 1} failed: {e}")
    logging.error(f"Failed to complete {action} after {max_retries} attempts.")
    send_email_alert("Mitigation Action Failure", f"Failed to complete action after {max_retries} attempts.")
    return False

# A. Alert the driver for each detected anomaly
retry_action(lambda: alert_driver(f"Critical anomaly detected"))

# B. Isolate the ECU if anomalies are detected
if len(reconstruction_errors) > 0:
    retry_action(lambda: isolate_ecu('ECU_1'))
    check_ecu_isolation('ECU_1')  # Check if isolation was successful

# C. Trigger safe mode if too many anomalies are detected
if len(reconstruction_errors) > 5:
    retry_action(trigger_safe_mode)

# ==============================
# Real-Time Monitoring Simulation
# ==============================
def real_time_monitoring(data_loader, model, threshold):
    model.eval()
    for batch in data_loader:
        inputs = batch[0]
        outputs = model(inputs)
        reconstruction_error = criterion(outputs, inputs).item()
        
        logging.info(f"Real-time Reconstruction Error: {reconstruction_error}")
        
        if reconstruction_error > threshold:
            logging.warning(f"Real-time Anomaly detected with error: {reconstruction_error}")
            retry_action(lambda: alert_driver(f"Real-time anomaly detected with error {reconstruction_error}"))
            retry_action(lambda: isolate_ecu('ECU_1'))
        time.sleep(1)

# Simulate real-time monitoring
real_time_monitoring(dataloader, model, threshold)


2024-09-25 20:58:46,154 - INFO - Data loaded and concatenated. Total rows: 4000, Total columns: 32
2024-09-25 20:58:46,290 - INFO - Data preprocessing complete. Shape after preprocessing: (4000, 1665)
2024-09-25 20:58:46,302 - INFO - DataLoader created with batch size 64.
2024-09-25 20:58:46,308 - INFO - Autoencoder initialized with input size 1665.
2024-09-25 20:58:46,646 - INFO - Epoch 1/50, Loss: 0.0098356232047081
2024-09-25 20:58:47,113 - INFO - Epoch 2/50, Loss: 0.007966394536197186
2024-09-25 20:58:47,577 - INFO - Epoch 3/50, Loss: 0.008354000747203827
2024-09-25 20:58:48,307 - INFO - Epoch 4/50, Loss: 0.009957513771951199
2024-09-25 20:58:48,939 - INFO - Epoch 5/50, Loss: 0.007982984185218811
2024-09-25 20:58:49,415 - INFO - Epoch 6/50, Loss: 0.008038957603275776
2024-09-25 20:58:50,115 - INFO - Epoch 7/50, Loss: 0.00808742456138134
2024-09-25 20:58:50,771 - INFO - Epoch 8/50, Loss: 0.008047855459153652
2024-09-25 20:58:51,355 - INFO - Epoch 9/50, Loss: 0.00823643896728754
2024