# Task 1: Exploratory Data Analysis (EDA) and Business Insights

In [None]:
def perform_eda():
    # Rename Price column in products to avoid conflict during merge
    products.rename(columns={'Price': 'ProductPrice'}, inplace=True)

    # Merge datasets for comprehensive analysis
    merged_data = pd.merge(transactions, products, on='ProductID', how='inner')
    merged_data = pd.merge(merged_data, customers, on='CustomerID', how='inner')

    # Convert dates to datetime format
    merged_data['TransactionDate'] = pd.to_datetime(merged_data['TransactionDate'])
    merged_data['SignupDate'] = pd.to_datetime(merged_data['SignupDate'])

    # Basic EDA
    print("Basic Information:")
    print(merged_data.info())
    print("\nSummary Statistics:")
    print(merged_data.describe())

    # Check for missing values
    print("\nMissing Values:")
    print(merged_data.isnull().sum())

    # Insight 1: Regional dominance
    region_sales = merged_data.groupby('Region')['TotalValue'].sum().sort_values(ascending=False)
    print("\nSales by Region:")
    print(region_sales)

    plt.figure(figsize=(10, 6))
    sns.barplot(x=region_sales.index, y=region_sales.values)
    plt.title('Total Sales by Region')
    plt.xlabel('Region')
    plt.ylabel('Total Sales')
    plt.show()

    # Insight 2: Category popularity
    category_sales = merged_data.groupby('Category')['TotalValue'].sum().sort_values(ascending=False)
    print("\nSales by Category:")
    print(category_sales)

    plt.figure(figsize=(10, 6))
    sns.barplot(x=category_sales.index, y=category_sales.values)
    plt.title('Total Sales by Product Category')
    plt.xlabel('Category')
    plt.ylabel('Total Sales')
    plt.show()

    # Insight 3: High-value customers
    customer_sales = merged_data.groupby('CustomerID')['TotalValue'].sum()
    top_customers = customer_sales.sort_values(ascending=False).head(10)
    print("\nTop 10 High-Value Customers:")
    print(top_customers)

    plt.figure(figsize=(10, 6))
    sns.barplot(x=top_customers.index.astype(str), y=top_customers.values)
    plt.title('Top 10 High-Value Customers')
    plt.xlabel('Customer ID')
    plt.ylabel('Total Sales')
    plt.xticks(rotation=45)
    plt.show()

    # Insight 4: Signup behavior
    merged_data['SignupDate'] = pd.to_datetime(merged_data['SignupDate'])
    merged_data['CustomerAge'] = (merged_data['TransactionDate'].max() - merged_data['SignupDate']).dt.days

    recent_signups = merged_data[merged_data['CustomerAge'] < 180]  # Customers who signed up in the last 6 months
    avg_value_recent = recent_signups['TotalValue'].mean()
    print(f"\nAverage Transaction Value for Recent Signups (last 6 months): {avg_value_recent}")

    plt.figure(figsize=(10, 6))
    sns.histplot(recent_signups['TotalValue'], bins=20, kde=True)
    plt.title('Transaction Value Distribution for Recent Signups')
    plt.xlabel('Total Transaction Value')
    plt.ylabel('Frequency')
    plt.show()

    # Insight 5: Seasonal sales performance
    merged_data['TransactionDate'] = pd.to_datetime(merged_data['TransactionDate'])
    merged_data['Month'] = merged_data['TransactionDate'].dt.month
    monthly_sales = merged_data.groupby('Month')['TotalValue'].sum()

    print("\nMonthly Sales Performance:")
    print(monthly_sales)

    plt.figure(figsize=(10, 6))
    sns.lineplot(x=monthly_sales.index, y=monthly_sales.values, marker='o')
    plt.title('Monthly Sales Trend')
    plt.xlabel('Month')
    plt.ylabel('Total Sales')
    plt.xticks(range(1, 13))
    plt.grid(True)
    plt.show()

In [None]:
# Execute the tasks
perform_eda()