# Data Exploration

This notebook explores the inventory data to understand patterns and characteristics.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from data.extractors.sales_extractor import SalesExtractor
from data.extractors.purchase_extractor import PurchaseExtractor
from data.extractors.job_order_extractor import JobOrderExtractor

%matplotlib inline
sns.set_style('whitegrid')

## 1. Extract Data

In [None]:
# Set date range
end_date = datetime.now().date()
start_date = end_date - timedelta(days=90)

# Extract data
sales_extractor = SalesExtractor()
sales_data = sales_extractor.extract(start_date, end_date)

job_extractor = JobOrderExtractor()
consumption_data = job_extractor.extract(start_date, end_date)

print(f"Sales data shape: {sales_data.shape}")
print(f"Consumption data shape: {consumption_data.shape}")

## 2. Basic Statistics

In [None]:
# Top items by consumption
if not consumption_data.empty:
    top_items = consumption_data.groupby('item_id')['consumption'].sum().sort_values(ascending=False).head(20)
    print("Top 20 items by consumption:")
    print(top_items)

## 3. Time Series Visualization

In [None]:
# Plot daily consumption for top item
if not consumption_data.empty and len(top_items) > 0:
    top_item_id = top_items.index[0]
    item_data = consumption_data[consumption_data['item_id'] == top_item_id].copy()
    
    plt.figure(figsize=(12, 6))
    plt.plot(item_data['date'], item_data['consumption'])
    plt.title(f'Daily Consumption for Item {top_item_id}')
    plt.xlabel('Date')
    plt.ylabel('Consumption')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()