# 🚀 Quick Data Analysis Template

Copy this notebook to start any data analysis project! Pre-configured with best practices and common patterns.

**Usage**: Save as → Rename → Start analyzing!

## 📦 Setup & Imports

In [None]:
# 🐼 Data manipulation
import pandas as pd
import numpy as np

# 📊 Visualization  
import matplotlib.pyplot as plt
import seaborn as sns

# ⚙️ Configuration
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 100)

print("📚 Libraries loaded successfully!")

## 📥 Data Loading

In [None]:
# 📁 Load your data (update path)
# df = pd.read_csv('your_data.csv')
# df = pd.read_excel('your_data.xlsx')

# 🧪 For demo purposes, create sample data
np.random.seed(42)
df = pd.DataFrame({
    'date': pd.date_range('2023-01-01', periods=100),
    'sales': np.random.normal(1000, 200, 100),
    'category': np.random.choice(['A', 'B', 'C'], 100),
    'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
})

print(f"📊 Data loaded: {len(df)} rows, {len(df.columns)} columns")

## 🔍 Quick Data Overview

In [None]:
# 📋 Basic info
print("📏 Shape:", df.shape)
print("\n📝 Column types:")
print(df.dtypes)
print("\n🕳️ Missing values:")
print(df.isnull().sum())

# 👀 First few rows
display(df.head())

## 📊 Summary Statistics

In [None]:
# 🔢 Numerical summary
print("📈 Numerical columns:")
display(df.describe())

# 🏷️ Categorical summary
print("\n🏷️ Categorical columns:")
for col in df.select_dtypes(include=['object', 'category']).columns:
    print(f"\n{col}:")
    print(df[col].value_counts())

## 📈 Quick Visualizations

In [None]:
# 📊 Distribution plots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle('📊 Quick Data Overview', fontsize=16)

# Sales distribution
axes[0, 0].hist(df['sales'], bins=20, alpha=0.7, color='skyblue')
axes[0, 0].set_title('Sales Distribution')
axes[0, 0].set_xlabel('Sales')
axes[0, 0].set_ylabel('Frequency')

# Category counts
df['category'].value_counts().plot(kind='bar', ax=axes[0, 1], color='lightcoral')
axes[0, 1].set_title('Category Distribution')
axes[0, 1].tick_params(axis='x', rotation=45)

# Time series (if date column exists)
if 'date' in df.columns:
    df.set_index('date')['sales'].plot(ax=axes[1, 0], color='green')
    axes[1, 0].set_title('Sales Over Time')

# Correlation heatmap
numeric_cols = df.select_dtypes(include=[np.number]).columns
if len(numeric_cols) > 1:
    sns.heatmap(df[numeric_cols].corr(), annot=True, cmap='coolwarm', ax=axes[1, 1])
    axes[1, 1].set_title('Correlation Matrix')

plt.tight_layout()
plt.show()

## 🧹 Data Cleaning (Template)

In [None]:
# 🧹 Clean your data here
df_clean = df.copy()

# Remove duplicates
before = len(df_clean)
df_clean = df_clean.drop_duplicates()
print(f"🗑️ Removed {before - len(df_clean)} duplicates")

# Handle missing values (customize based on your data)
# df_clean['column'].fillna(df_clean['column'].median(), inplace=True)
# df_clean.dropna(subset=['important_column'], inplace=True)

# Convert data types if needed
# df_clean['date_column'] = pd.to_datetime(df_clean['date_column'])
# df_clean['category_column'] = df_clean['category_column'].astype('category')

print(f"✅ Clean dataset: {len(df_clean)} rows, {len(df_clean.columns)} columns")

## 🎯 Analysis Section (Customize This!)

In [None]:
# 🔍 Your specific analysis goes here
# Examples:

# Group by analysis
print("📊 Sales by Category:")
category_stats = df_clean.groupby('category')['sales'].agg(['mean', 'sum', 'count'])
display(category_stats)

# Create insights
print(f"\n💡 Key Insights:")
print(f"• Best performing category: {category_stats['mean'].idxmax()}")
print(f"• Total sales: ${df_clean['sales'].sum():,.0f}")
print(f"• Average daily sales: ${df_clean['sales'].mean():,.0f}")

## 📋 Next Steps & Conclusions

**🎯 Key Findings:**
- [ ] Finding #1
- [ ] Finding #2  
- [ ] Finding #3

**🔮 Recommendations:**
- [ ] Recommendation #1
- [ ] Recommendation #2
- [ ] Recommendation #3

**🚀 Future Work:**
- [ ] Additional analysis needed
- [ ] Data to collect
- [ ] Models to build
