# Task 1: Exploratory Data Analysis (EDA) on Retail Dataset

### Dataset Overview

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sample dataset
data = {
    'Order ID': ['CA-2016-152156', 'CA-2016-152156', 'CA-2016-138688', 'US-2015-108966'],
    'Order Date': pd.to_datetime(['2016-11-08', '2016-11-08', '2016-06-12', '2015-10-11']),
    'Ship Date': pd.to_datetime(['2016-11-11', '2016-11-11', '2016-06-16', '2015-10-18']),
    'Category': ['Furniture', 'Furniture', 'Office Supplies', 'Technology'],
    'Sub-Category': ['Bookcases', 'Chairs', 'Labels', 'Phones'],
    'Sales': [261.96, 731.94, 14.62, 957.58],
    'Quantity': [2, 3, 2, 5],
    'Discount': [0.0, 0.0, 0.0, 0.2],
    'Profit': [41.91, 219.58, 6.87, 183.10]
}
df = pd.DataFrame(data)
df['Order Month'] = df['Order Date'].dt.to_period('M')
df.head()

### KPI Calculations

In [None]:
total_sales = df['Sales'].sum()
avg_order_value = df.groupby('Order ID')['Sales'].sum().mean()
revenue_per_category = df.groupby('Category')['Sales'].sum()

print('Total Sales:', total_sales)
print('Average Order Value:', avg_order_value)
print('Revenue per Category:\n', revenue_per_category)

### Total Sales by Category

In [None]:
plt.figure(figsize=(8, 5))
sns.barplot(x=revenue_per_category.index, y=revenue_per_category.values)
plt.title('Total Sales by Category')
plt.ylabel('Sales ($)')
plt.xlabel('Category')
plt.tight_layout()
plt.show()

### Monthly Sales Trend

In [None]:
monthly_sales = df.groupby('Order Month')['Sales'].sum()
plt.figure(figsize=(8, 5))
monthly_sales.plot(marker='o')
plt.title('Monthly Sales Trend')
plt.ylabel('Sales ($)')
plt.xlabel('Month')
plt.grid(True)
plt.tight_layout()
plt.show()