# Comprehensive Sales Analysis - Customer Data Insights

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import f_oneway, ttest_ind, chi2_contingency
import statsmodels.api as sm
from statsmodels.stats.anova import anova_lm
from statsmodels.formula.api import ols
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LinearRegression
import warnings
warnings.filterwarnings('ignore')


In [3]:
# Set styling
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("=== COMPREHENSIVE SALES ANALYSIS FRAMEWORK ===")
print("Libraries loaded successfully!\n")


=== COMPREHENSIVE SALES ANALYSIS FRAMEWORK ===
Libraries loaded successfully!



# =============================================================================
# PHASE 1: DATA FOUNDATION & QUALITY ASSESSMENT
# =============================================================================


In [8]:
print("PHASE 1: DATA FOUNDATION & QUALITY ASSESSMENT")
print("=" * 50)

# Load the data
try:
    sales_data = pd.read_csv('sales_data.csv')
    print("Data loaded successfully!")
except FileNotFoundError:
    print("Error: sales_data.csv not found. Please ensure the file is in the correct directory.")
    exit(1)
df = pd.DataFrame(sales_data)


PHASE 1: DATA FOUNDATION & QUALITY ASSESSMENT
Data loaded successfully!


# 1.1 Data Audit

In [15]:
print("1.1 DATA AUDIT")
print("-" * 20)
display(f"Dataset shape: {df.shape}")
print(f"Missing values:\n{df.isnull().sum()}")
print(f"\nData types:\n{df.dtypes}")
print(f"\nFirst 5 rows:")
display(df.head())



1.1 DATA AUDIT
--------------------


'Dataset shape: (16, 11)'

Missing values:
Customer_ID           0
Customer_Name         0
Region                0
Total_Spend           0
Purchase_Frequency    0
Marketing_Spend       0
Seasonality_Index     0
Churned               0
Marketing_ROI         0
Spend_per_Purchase    0
Churned_Binary        0
dtype: int64

Data types:
Customer_ID             int64
Customer_Name          object
Region                 object
Total_Spend             int64
Purchase_Frequency      int64
Marketing_Spend         int64
Seasonality_Index     float64
Churned                object
Marketing_ROI         float64
Spend_per_Purchase    float64
Churned_Binary          int64
dtype: object

First 5 rows:


Unnamed: 0,Customer_ID,Customer_Name,Region,Total_Spend,Purchase_Frequency,Marketing_Spend,Seasonality_Index,Churned,Marketing_ROI,Spend_per_Purchase,Churned_Binary
0,101,John Doe,North,5000,12,2000,1.2,No,2.5,416.666667,0
1,102,Jane Smith,South,3000,8,1500,1.0,Yes,2.0,375.0,1
2,103,Sam Brown,East,4500,10,1800,1.1,No,2.5,450.0,0
3,104,Linda Johnson,West,2500,5,1000,0.9,Yes,2.5,500.0,1
4,105,Michael Lee,North,7000,15,2500,1.3,No,2.8,466.666667,0


In [10]:
# Create derived metrics
df['Marketing_ROI'] = df['Total_Spend'] / df['Marketing_Spend']
df['Spend_per_Purchase'] = df['Total_Spend'] / df['Purchase_Frequency']
df['Churned_Binary'] = df['Churned'].map({'Yes': 1, 'No': 0})

print(f"\nDerived metrics created: Marketing_ROI, Spend_per_Purchase, Churned_Binary")



Derived metrics created: Marketing_ROI, Spend_per_Purchase, Churned_Binary


# 1.2 Exploratory Data Analysis

In [12]:
print("\n1.2 EXPLORATORY DATA ANALYSIS")
print("-" * 30)

# Descriptive statistics
print("Descriptive Statistics:")
display(df.describe())

# Correlation analysis
numerical_cols = ['Total_Spend', 'Purchase_Frequency', 'Marketing_Spend', 'Seasonality_Index', 'Marketing_ROI', 'Spend_per_Purchase']
correlation_matrix = df[numerical_cols].corr()

plt.figure(figsize=(15, 10))



1.2 EXPLORATORY DATA ANALYSIS
------------------------------
Descriptive Statistics:


Unnamed: 0,Customer_ID,Total_Spend,Purchase_Frequency,Marketing_Spend,Seasonality_Index,Marketing_ROI,Spend_per_Purchase,Churned_Binary
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,108.5,4137.5,9.5,1675.0,1.04375,2.449538,440.259168,0.5
std,4.760952,1396.125591,3.224903,484.424057,0.154785,0.197741,45.277209,0.516398
min,101.0,2500.0,5.0,1000.0,0.8,2.0,366.666667,0.0
25%,104.75,2975.0,6.75,1300.0,0.9,2.302335,409.375,0.0
50%,108.5,3900.0,9.5,1650.0,1.05,2.474937,450.0,0.5
75%,112.25,5075.0,12.0,2025.0,1.2,2.529762,462.820513,1.0
max,116.0,7000.0,15.0,2500.0,1.3,2.8,520.0,1.0


<Figure size 1500x1000 with 0 Axes>

<Figure size 1500x1000 with 0 Axes>