# Exploratory Data Analysis (EDA)

This notebook analyzes the Gold Layer data (Aggregated Metrics) to identify trends, seasonality, and correlations.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

sns.set_theme(style="whitegrid")

## Load Data

In [None]:
data_path = '../data/gold/'

# Load Datasets
shelter_df = pd.read_parquet(os.path.join(data_path, 'daily_shelter_stats.parquet'))
spend_df = pd.read_parquet(os.path.join(data_path, 'daily_vendor_spend.parquet'))
media_df = pd.read_parquet(os.path.join(data_path, 'daily_media_volume.parquet'))

print("Shelter Data:", shelter_df.shape)
print("Spend Data:", spend_df.shape)
print("Media Data:", media_df.shape)

## 1. Shelter Population Trends

In [None]:
plt.figure(figsize=(12, 6))
sns.lineplot(data=shelter_df, x='date', y='total_population', marker='o')
plt.title('Daily Shelter Population')
plt.xlabel('Date')
plt.ylabel('Population')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 2. Vendor Spending Over Time

In [None]:
plt.figure(figsize=(12, 6))
sns.barplot(data=spend_df, x='date', y='total_spend', color='skyblue')
plt.title('Daily Vendor Spending')
plt.xlabel('Date')
plt.ylabel('Spend ($)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 3. Media Volume (Possible Leading Indicator?)

In [None]:
media_melted = media_df.melt(id_vars=['date'], value_vars=['news_count', 'social_count'], var_name='Type', value_name='Count')

plt.figure(figsize=(12, 6))
sns.lineplot(data=media_melted, x='date', y='Count', hue='Type')
plt.title('Daily Media Volume')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 4. Correlation Analysis

In [None]:
# Merge all datasets on date
merged_df = pd.merge(shelter_df, spend_df, on='date', how='outer')
merged_df = pd.merge(merged_df, media_df, on='date', how='outer')

# Correlation Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(merged_df.corr(numeric_only=True), annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()