# Sales and Profit Performance Analysis

This notebook explores the sales and profit data to identify trends, patterns, and insights.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys

# Configure visualizations
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

# Add the src directory to Python path to import our modules
sys.path.append('..')
from src.data_processing import load_data, clean_data, generate_features
from src.analysis import calculate_summary_stats, time_series_analysis, product_performance_analysis
from src.visualization import plot_sales_trend, plot_profit_margin_by_category, plot_sales_by_region

## Data Loading and Preprocessing

In [None]:
# Load the data
file_path = os.path.join('..', 'data', 'raw', 'Sales dataset.csv')
df = load_data(file_path)
print(f"Loaded {len(df)} records")

# Display the first few rows
df.head()

In [None]:
# Check data info
df.info()

In [None]:
# Clean and preprocess the data
df_clean = clean_data(df)
print(f"After cleaning: {len(df_clean)} records")

# Generate additional features
df_enhanced = generate_features(df_clean)
print(f"Enhanced data shape: {df_enhanced.shape}")

# Display the processed data
df_enhanced.head()

## Exploratory Data Analysis

In [None]:
# Summary statistics
df_enhanced.describe()

In [None]:
# Calculate business metrics
summary_stats = calculate_summary_stats(df_enhanced)

print(f"Total Sales: ${summary_stats['total_sales']:,.2f}")
print(f"Total Profit: ${summary_stats['total_profit']:,.2f}")
print(f"Average Profit Margin: {summary_stats['average_profit_margin']:.2%}")

# Display top selling categories
print("\nTop Selling Categories:")
for category, sales in summary_stats.get('top_selling_categories', {}).items():
    print(f"{category}: ${sales:,.2f}")

# Display most profitable categories
print("\nMost Profitable Categories:")
for category, profit in summary_stats.get('most_profitable_categories', {}).items():
    print(f"{category}: ${profit:,.2f}")

## Sales Trends Analysis

In [None]:
# Analyze sales trends over time
time_series = time_series_analysis(df_enhanced)

# Plot monthly sales trend
plt.figure(figsize=(14, 8))
plt.plot(time_series['monthly']['dates'], time_series['monthly']['sales'], marker='o')
plt.title('Monthly Sales Trend', fontsize=16)
plt.xlabel('Month', fontsize=14)
plt.ylabel('Sales ($)', fontsize=14)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Plot monthly profit trend
plt.figure(figsize=(14, 8))
plt.plot(time_series['monthly']['dates'], time_series['monthly']['profit'], marker='o', color='green')
plt.title('Monthly Profit Trend', fontsize=16)
plt.xlabel('Month', fontsize=14)
plt.ylabel('Profit ($)', fontsize=14)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Product Performance Analysis

In [None]:
# Analyze product category performance
product_perf = product_performance_analysis(df_enhanced)
product_perf.head(10)

In [None]:
# Visualize profit margin by category
plot_profit_margin_by_category(df_enhanced)
plt.tight_layout()
plt.show()

## Geographic Analysis

In [None]:
# Analyze sales by region
plot_sales_by_region(df_enhanced)
plt.tight_layout()
plt.show()

In [None]:
# Sales by country
if 'Customer Country' in df_enhanced.columns:
    country_sales = df_enhanced.groupby('Customer Country')['Sales'].sum().sort_values(ascending=False)
    
    plt.figure(figsize=(14, 8))
    country_sales.head(10).plot(kind='bar')
    plt.title('Sales by Country (Top 10)', fontsize=16)
    plt.xlabel('Country', fontsize=14)
    plt.ylabel('Sales ($)', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## Customer Segment Analysis

In [None]:
# Analyze sales by customer segment
if 'Customer Segment' in df_enhanced.columns:
    segment_sales = df_enhanced.groupby('Customer Segment').agg({
        'Sales': 'sum',
        'Profit Per Order': 'sum',
        'Order Quantity': 'sum'
    }).reset_index()
    
    # Calculate average order value by segment
    segment_sales['Average Order Value'] = segment_sales['Sales'] / segment_sales['Order Quantity']
    
    print("Customer Segment Analysis:")
    print(segment_sales)
    
    # Visualize sales by segment
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Customer Segment', y='Sales', data=segment_sales)
    plt.title('Sales by Customer Segment', fontsize=16)
    plt.xlabel('Customer Segment', fontsize=14)
    plt.ylabel('Sales ($)', fontsize=14)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## Correlation Analysis

In [None]:
# Select numeric columns for correlation analysis
numeric_cols = df_enhanced.select_dtypes(include=[np.number]).columns.tolist()
corr_matrix = df_enhanced[numeric_cols].corr()

# Create a heatmap of correlations
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix of Numeric Variables', fontsize=16)
plt.tight_layout()
plt.show()

## Conclusions and Recommendations

Based on the analysis above, here are the key findings and recommendations:

1. **Top performing product categories** - Focus on expanding these categories and identifying what makes them successful
2. **Sales trends** - Identify seasonal patterns and adjust inventory and marketing accordingly
3. **Customer segments** - Target marketing efforts to the most profitable customer segments
4. **Geographic insights** - Explore opportunities for growth in high-performing regions

Additional analyses that could provide further insights:
- Customer retention and repeat purchase patterns
- Price elasticity analysis
- Bundle analysis for complementary products