# Energy Data Exploration

Quick look at the energy consumption data to understand patterns and trends.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Load the data
df = pd.read_csv('../data/energy_consumption.csv', parse_dates=['Date'], dayfirst=True)
print(f"Data shape: {df.shape}")
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")
df.head()

In [None]:
# Basic info about the dataset
df.info()
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Calculate consumption if needed
if 'Consumption' not in df.columns or df['Consumption'].sum() == 0:
    df['Consumption'] = df['Reading'].diff().fillna(0)
    df = df[df['Consumption'] >= 0]

print(f"Consumption stats:")
print(df['Consumption'].describe())

In [None]:
# Plot consumption over time
plt.figure(figsize=(15, 8))

plt.subplot(2, 2, 1)
plt.plot(df['Date'], df['Consumption'], marker='o')
plt.title('Energy Consumption Over Time')
plt.ylabel('kWh')
plt.xticks(rotation=45)

plt.subplot(2, 2, 2)
plt.hist(df['Consumption'], bins=10, alpha=0.7)
plt.title('Consumption Distribution')
plt.xlabel('kWh')

plt.subplot(2, 2, 3)
monthly_avg = df.groupby(df['Date'].dt.month)['Consumption'].mean()
monthly_avg.plot(kind='bar')
plt.title('Average Monthly Consumption')
plt.xlabel('Month')
plt.ylabel('kWh')

plt.subplot(2, 2, 4)
plt.boxplot([df[df['Date'].dt.month.isin([12,1,2])]['Consumption'],
             df[df['Date'].dt.month.isin([6,7,8])]['Consumption']],
            labels=['Winter', 'Summer'])
plt.title('Seasonal Comparison')
plt.ylabel('kWh')

plt.tight_layout()
plt.show()

In [None]:
# Check for trends and seasonality
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year

print("Average consumption by month:")
monthly_stats = df.groupby('Month')['Consumption'].agg(['mean', 'std']).round(1)
print(monthly_stats)

print("\nYear over year comparison:")
yearly_stats = df.groupby('Year')['Consumption'].agg(['mean', 'sum']).round(1)
print(yearly_stats)

In [None]:
# Look for any anomalies or outliers
Q1 = df['Consumption'].quantile(0.25)
Q3 = df['Consumption'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outliers = df[(df['Consumption'] < lower_bound) | (df['Consumption'] > upper_bound)]
print(f"Found {len(outliers)} potential outliers:")
if len(outliers) > 0:
    print(outliers[['Date', 'Consumption']])