# 📊 Exploratory Data Analysis (EDA) & Preprocessing

This notebook performs **data loading, cleaning, and initial exploratory analysis** for the AI-driven predictive maintenance project.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')
%matplotlib inline

## 1️⃣ Load the Dataset

In [None]:
# Load dataset (replace with actual dataset)
data_path = "../data/solar_pv_sensor_data.csv"  # Update with actual path
df = pd.read_csv(data_path)

# Display first few rows
df.head()

## 2️⃣ Data Summary & Missing Values

In [None]:
# Check for missing values and basic stats
print("Dataset Shape:", df.shape)
print("Missing Values:", df.isnull().sum())

# Summary statistics
df.describe()

## 3️⃣ Correlation Heatmap

In [None]:
# Visualizing feature correlations
plt.figure(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)
plt.title("Feature Correlation Heatmap")
plt.show()

## 4️⃣ Time Series Analysis

In [None]:
# Example: Plot Energy Output Over Time
plt.figure(figsize=(12, 5))
plt.plot(df['timestamp'], df['energy_output'], label='Energy Output')
plt.xlabel('Time')
plt.ylabel('Energy Output (kWh)')
plt.title('Energy Output Over Time')
plt.legend()
plt.xticks(rotation=45)
plt.show()

## 5️⃣ Feature Engineering

In [None]:
# Example: Creating new time-based features
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek

# Check updated dataframe
df.head()