# EXPLORATORY DATA ANALYSIS (EDA)
### SENSOR + NETWORK + ACCURACY ANALYSIS

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from scipy.stats import zscore

# Load dataset
df = pd.read_csv("Datasets/Cleaned_Dataset.csv", dtype={"timestamp": str})
df['timestamp'] = pd.to_datetime(df['timestamp'])

sensor_cols = ["lidar_points", "radar_objects", "camera_objects"]
network_cols = ["packet_drop_rate", "packet_delivery_ratio", "latency_ms", "throughput_kbps"]
accuracy_cols = ["obstacle_detection_accuracy", "decision_accuracy"]
gps_cols = ["gps_latitude", "gps_longitude"]

numeric_cols = gps_cols + sensor_cols + network_cols + accuracy_cols
df_numeric = df[numeric_cols]

FileNotFoundError: [Errno 2] No such file or directory: 'Datasets/Cleaned_Dataset.csv'

________________________________________
### 1. Summary Statistics

In [None]:
df_numeric.describe()

_________________________________________
### 2. Sensor Behaviour Analysis

Pairplot

In [None]:
sns.pairplot(df[sensor_cols])
plt.suptitle("Sensor Behaviour Patterns (LiDAR / Radar / Camera)", y=1.02)
plt.show()

Boxplot

In [None]:
for col in sensor_cols:
    plt.figure(figsize=(7,4))
    sns.boxplot(x=df[col])
    plt.title(f"Outlier Analysis: {col}")
    plt.show()

______________________________________________
### 3. Network Performance Analysis

Network Histogram

In [None]:
df[network_cols].hist(figsize=(10, 8), bins=20)
plt.suptitle("Network Performance Distributions", y=1.02)
plt.tight_layout()
plt.show()

Network Outliers

In [None]:
for col in network_cols:
    plt.figure(figsize=(7,4))
    sns.boxplot(x=df[col])
    plt.title(f"Outlier Analysis: {col}")
    plt.show()

_________________
### 4. Accuracy Metric Analysis

Accuracy Histogram

In [None]:
df[accuracy_cols].hist(figsize=(10, 6), bins=20)
plt.suptitle("Detection & Decision Accuracy Distributions", y=1.02)
plt.tight_layout()
plt.show()

Accuracy Relationship

In [None]:
sns.scatterplot(data=df, x="obstacle_detection_accuracy", y="decision_accuracy")
plt.title("Obstacle Detection vs Decision Accuracy")
plt.show()

___________________________________________________
### 5. GPS Position Analysis

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(df["gps_longitude"], df["gps_latitude"], s=8)
plt.title("Vehicle GPS Movement Pattern")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.grid(True)
plt.show()

_____________________________________________
### 6. NON-LINEAR RELATIONSHIPS (Bin Analysis)

Even when correlations are near zero, non-linear effects exist.

Latency impact on decision accuracy

In [None]:
df["latency_bin"] = pd.qcut(df["latency_ms"], 10)

latency_vs_accuracy = df.groupby("latency_bin")["decision_accuracy"].mean()

plt.figure(figsize=(12,4))
latency_vs_accuracy.plot(marker='o')
plt.xticks(rotation=45)
plt.title("Non-linear Effect: Decision Accuracy vs Latency (Binned)")
plt.ylabel("Average Decision Accuracy")
plt.xlabel("Latency Bins")
plt.grid(True)
plt.show()

Packet Drop Effect

In [None]:
df["drop_bin"] = pd.qcut(df["packet_drop_rate"], 10)

drop_vs_accuracy = df.groupby("drop_bin")["decision_accuracy"].mean()

plt.figure(figsize=(12,4))
drop_vs_accuracy.plot(marker='o', color='red')
plt.xticks(rotation=45)
plt.title("Non-linear Effect: Decision Accuracy vs Packet Drop Rate (Binned)")
plt.ylabel("Average Decision Accuracy")
plt.grid(True)
plt.show()