# 🎯 SOLUSI PRAKTIKUM 3: PANDAS



### EXPLORATORY DATA ANALYSIS ###

In [None]:
import pandas as pd
import numpy as np

In [None]:


'''SOLUSI: Analisis Dataset Retail'''
# Create sample retail dataset
np.random.seed(42)
dates = pd.date_range('2023-01-01', '2023-03-31', freq='D')
retail_data = pd.DataFrame({
    'date': np.random.choice(dates, 1000),
    'product_id': np.random.randint(1, 11, 1000),
    'quantity': np.random.randint(1, 10, 1000),
    'price': np.random.uniform(10, 100, 1000),
    'customer_id': np.random.randint(1, 101, 1000)
})
retail_data['revenue'] = retail_data['quantity'] * retail_data['price']

# SOLUSI 1: Hitung total revenue per product
revenue_per_product = retail_data.groupby('product_id')['revenue'].sum().sort_values(ascending=False)

# SOLUSI 2: Temukan top 5 customers berdasarkan total spending
top_customers = retail_data.groupby('customer_id')['revenue'].sum().nlargest(5)

# SOLUSI 3: Analisis time series - revenue per hari
daily_revenue = retail_data.groupby('date')['revenue'].sum()

# SOLUSI 4: Buat fungsi untuk detect anomalies dalam quantity
def detect_quantity_anomalies(df, threshold=2):
    # Calculate Z-scores for quantity
    mean_qty = df['quantity'].mean()
    std_qty = df['quantity'].std()
    
    df['quantity_zscore'] = (df['quantity'] - mean_qty) / std_qty
    anomalies = df[np.abs(df['quantity_zscore']) > threshold]
    
    return anomalies

anomalies = detect_quantity_anomalies(retail_data)

print("=== RETAIL DATA ANALYSIS ===")
print("\n1. Revenue per Product:")
print(revenue_per_product)

print("\n2. Top 5 Customers by Spending:")
print(top_customers)

print("\n3. Daily Revenue (first 10 days):")
print(daily_revenue.head(10))

print(f"\n4. Quantity Anomalies detected: {len(anomalies)}")
print("Anomalies sample:")
print(anomalies[['date', 'product_id', 'quantity', 'quantity_zscore']].head())

# BONUS: Advanced analysis
print("\n=== BONUS ANALYSIS ===")

# Monthly revenue trend
retail_data['month'] = retail_data['date'].dt.month
monthly_revenue = retail_data.groupby('month')['revenue'].sum()
print("Monthly Revenue:")
print(monthly_revenue)

# Customer segmentation by spending
customer_stats = retail_data.groupby('customer_id').agg({
    'revenue': ['sum', 'mean', 'count'],
    'quantity': 'mean'
}).round(2)
print("\nCustomer Statistics:")
print(customer_stats.head())

# TEST ASSERTIONS
assert len(revenue_per_product) <= 10, "Should have max 10 products"
assert len(top_customers) == 5, "Should have top 5 customers"
assert len(daily_revenue) > 0, "Should have daily revenue data"

print("\n✅ Semua assertions berhasil! Solusi benar.")