# Exploratory Analysis of Predictions
This notebook performs initial statistical exploration of the predictions stored in `predictions_log.csv`. It includes descriptive statistics, distribution plots, and sets up the foundation for drift detection.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load predictions log
df = pd.read_csv("data/predictions_log.csv")
df.head()

In [None]:
# Basic info and descriptive statistics
df.info()
df.describe()

In [None]:
# Plot the distribution of the predicted values
plt.figure(figsize=(8, 5))
sns.histplot(df['prediction'], kde=True, bins=30)
plt.title("Distribution of Predicted Housing Prices")
plt.xlabel("Price")
plt.ylabel("Frequency")
plt.show()

In [None]:
# Plot the distribution of a key input feature (e.g., RM - average number of rooms)
plt.figure(figsize=(8, 5))
sns.histplot(df['rm'], kde=True, bins=30)
plt.title("Distribution of Feature: RM (average number of rooms)")
plt.xlabel("RM")
plt.ylabel("Frequency")
plt.show()