# Data Analysis Examples

This notebook demonstrates various data analysis techniques using Python libraries.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
plt.style.use('seaborn')
sns.set_palette('husl')

## Generate Sample Dataset

In [None]:
# Create sample sales data
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
sales_data = pd.DataFrame({
    'date': dates,
    'sales': np.random.normal(1000, 200, len(dates)),
    'category': np.random.choice(['Electronics', 'Clothing', 'Food'], len(dates))
})

# Display first few rows
sales_data.head()

## Data Visualization

In [None]:
# Create monthly sales trend
monthly_sales = sales_data.groupby([sales_data['date'].dt.year, 
                                   sales_data['date'].dt.month])['sales'].mean()

plt.figure(figsize=(12, 6))
monthly_sales.plot(kind='line', marker='o')
plt.title('Monthly Average Sales Trend')
plt.xlabel('Month')
plt.ylabel('Average Sales')
plt.grid(True)
plt.show()

In [None]:
# Sales distribution by category
plt.figure(figsize=(10, 6))
sns.boxplot(x='category', y='sales', data=sales_data)
plt.title('Sales Distribution by Category')
plt.show()

## Statistical Analysis

In [None]:
# Basic statistics by category
stats_by_category = sales_data.groupby('category')['sales'].agg([
    'count', 'mean', 'std', 'min', 'max'
]).round(2)

print("Statistical Summary by Category:")
display(stats_by_category)