In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Configure plots
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

# Load the dataset
df = pd.read_csv("../data/sap.csv", parse_dates=["Start", "End"])

# Ensure column names are stripped of extra spaces
df.columns = df.columns.str.strip()

# Summary printout
print("Basic Info:")
print(df.info())
print("\nHead:")
print(df.head())

# Create new columns
df["Hour"] = df["Start"].dt.hour
df["Date"] = df["Start"].dt.date

# === Plot 1: Histogram of Energy Delivered ===
plt.figure()
sns.histplot(df["Energy"], bins=50, kde=True)
plt.title("Distribution of Energy Delivered (kWh)")
plt.xlabel("Energy (kWh)")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("energy_distribution.png")

# === Plot 2: Average Energy by Day of the Week ===
plt.figure()
order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
sns.barplot(x="Day", y="Energy", data=df, order=order, estimator='mean', ci=None)
plt.title("Average Energy Delivered by Day of the Week")
plt.ylabel("Average Energy (kWh)")
plt.tight_layout()
plt.savefig("avg_energy_by_day.png")

# === Plot 3: Charging Start Time Distribution by Weekend ===
plt.figure()
sns.histplot(data=df, x="Hour", hue="Weekend", bins=24, multiple="stack", palette="Set2")
plt.title("Charging Start Hour Distribution (Weekend vs Weekday)")
plt.xlabel("Hour of Day")
plt.ylabel("Number of Charging Sessions")
plt.tight_layout()
plt.savefig("start_hour_distribution.png")

# === Plot 4: Charge Duration vs Energy ===
plt.figure()
sns.scatterplot(x="Charge.Duration", y="Energy", data=df, hue="Weekend", alpha=0.6)
plt.title("Charge Duration vs Energy Delivered")
plt.xlabel("Charge Duration (min)")
plt.ylabel("Energy Delivered (kWh)")
plt.tight_layout()
plt.savefig("duration_vs_energy.png")

print("Plots saved: energy_distribution.png, avg_energy_by_day.png, start_hour_distribution.png, duration_vs_energy.png")
