# Day-wise Invoice Sum Report for Multiple Sheets
This notebook reads three Excel sheets, filters out unwanted rows, groups by day number, and writes the results to a new Excel file for easy analysis.

In [290]:
# Get necessary imports
import pandas as pd
import openpyxl
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment, numbers
from openpyxl.utils import get_column_letter
from datetime import datetime
import calendar
from copy import copy  # For copying Excel cell styles


## Define the sheets and paths to process

In [291]:
# Automated Path Configuration
import os
import glob
from datetime import datetime, timedelta
import calendar
import pandas as pd
import json

def get_month_year_combinations(latest_month_year):
    """
    Get the latest month, last month, and last year combinations
    """
    # Parse the latest month-year
    month_name, year = latest_month_year.split('-')
    year = int(year)
    month_num = list(calendar.month_name).index(month_name)
    
    # Calculate last month
    if month_num == 1:  # January
        last_month_num = 12
        last_month_year_num = year - 1
    else:
        last_month_num = month_num - 1
        last_month_year_num = year
    
    # Calculate last year same month
    last_year_month_num = month_num
    last_year_year_num = year - 1
    
    # Convert back to names
    last_month_name = calendar.month_name[last_month_num]
    last_year_month_name = calendar.month_name[last_year_month_num]
    
    return {
        'latest': {'month': month_name, 'year': year, 'folder': f"{month_name}-{year}"},
        'last_month': {'month': last_month_name, 'year': last_month_year_num, 'folder': f"{last_month_name}-{last_month_year_num}"},
        'last_year': {'month': last_year_month_name, 'year': last_year_year_num, 'folder': f"{last_year_month_name}-{last_year_year_num}"}
    }

def find_file_by_keyword(folder_path, keyword):
    """
    Find a file in the folder that contains the keyword in its name
    """
    if not os.path.exists(folder_path):
        return None
    
    files = os.listdir(folder_path)
    for file in files:
        if keyword.lower() in file.lower() and file.endswith('.xlsx'):
            return os.path.join(folder_path, file)
    return None

def get_sheet_name_with_keyword(file_path, keyword):
    """
    Get the sheet name that contains the keyword
    """
    try:
        xl = pd.ExcelFile(file_path)
        for sheet_name in xl.sheet_names:
            if keyword.lower() in sheet_name.lower():
                return sheet_name
        # If no sheet with keyword found, return the first sheet
        return xl.sheet_names[0] if xl.sheet_names else None
    except:
        return None

def setup_automated_paths(latest_month_year, dsr_folder_path=None):
    """
    Setup all paths automatically based on the latest month-year input
    
    Parameters:
    latest_month_year: str - Format: "June-2025"
    dsr_folder_path: str - Full path to DSR folder (if None, uses default)
    
    Returns:
    dict containing all the required paths and configurations
    """
    
    # Get month-year combinations
    dates = get_month_year_combinations(latest_month_year)
    
    # Base DSR folder path
    if dsr_folder_path:
        dsr_path = dsr_folder_path
    else:
        dsr_path = os.path.join(os.getcwd(), "test", "DSR")
    
    # Prepare results
    sheet_info = []
    target_info = {}
    session_info = {}
    
    # Process each period (last_month, last_year, latest)
    periods = ['last_month', 'last_year', 'latest']
    display_names = [
        f"{dates['last_month']['month']} {dates['last_month']['year'] % 100}",  # May 25
        f"{dates['last_year']['month']} {dates['last_year']['year'] % 100}",   # June 24
        f"{dates['latest']['month']} {dates['latest']['year'] % 100}"          # June 25
    ]
    
    for i, period in enumerate(periods):
        period_data = dates[period]
        folder_path = os.path.join(dsr_path, period_data['folder'])
        
        # Find invoice file
        invoice_file = find_file_by_keyword(folder_path, 'invoice')
        if invoice_file:
            # Get the first sheet (since invoice files have only one sheet)
            try:
                xl = pd.ExcelFile(invoice_file)
                sheet_name = xl.sheet_names[0] if xl.sheet_names else 'Sheet1'
            except:
                sheet_name = 'Sheet1'
            
            # Use absolute path
            sheet_info.append((invoice_file, sheet_name, display_names[i]))
    
    # Setup target information (using latest month)
    latest_folder = os.path.join(dsr_path, dates['latest']['folder'])
    target_file = find_file_by_keyword(latest_folder, 'target')
    if target_file:
        target_sheet = get_sheet_name_with_keyword(target_file, 'target')
        if not target_sheet:
            target_sheet = 'Target'  # Default if not found
        
        target_info = {
            'path': target_file,
            'sheet': target_sheet
        }
    
    # Setup session information (using latest month)
    traffic_file = find_file_by_keyword(latest_folder, 'traffic')
    if traffic_file:
        download_sheet = get_sheet_name_with_keyword(traffic_file, 'download')
        if not download_sheet:
            # If no download sheet found, get the first sheet
            try:
                xl = pd.ExcelFile(traffic_file)
                download_sheet = xl.sheet_names[0] if xl.sheet_names else 'Sheet1'
            except:
                download_sheet = 'Sheet1'
        
        session_info = {
            'path': traffic_file,
            'sheet': download_sheet
        }
    
    return {
        'sheet_info': sheet_info,
        'target_info': target_info,
        'session_info': session_info,
        'dates': dates
    }

# Define target Excel file 
output_path = 'invoice_day_channel_report_compatible.xlsx'

# AUTOMATED PATH CONFIGURATION
# Get user input for latest month-year and DSR path
latest_month_year = input("Enter the latest month-year (e.g., 'June-2025'): ").strip()
# Load configuration from config.json
try:
    with open('config.json', 'r') as f:
        config_data = json.load(f)
    dsr_folder_path = config_data['paths']['dsr_folder_path']
except Exception as e:
    print(f"❌ Error loading config.json: {e}")
    print("Using default DSR folder path...")
    dsr_folder_path = "C:\\Users\\91843\\Documents\\VsCode Codes\\ReportAutomation\\test\\DSR"

# Setup all paths automatically
try:
    config = setup_automated_paths(latest_month_year, dsr_folder_path)
    
    # Extract configuration
    sheet_info = config['sheet_info']
    target_config = config['target_info']
    session_config = config['session_info']
    
    print(f"\n✅ Configuration successful!")
    if dsr_folder_path:
        print(f"📁 Using DSR folder: {dsr_folder_path}")
    print(f"📁 Found {len(sheet_info)} invoice files:")
    for i, (path, sheet, display) in enumerate(sheet_info):
        print(f"   {i+1}. {display}: {path} -> {sheet}")
    
    if target_config:
        print(f"🎯 Target file: {target_config['path']} -> {target_config['sheet']}")
    else:
        print("⚠️  No target file found - using fallback")
    
    if session_config:
        print(f"📊 Session file: {session_config['path']} -> {session_config['sheet']}")
    else:
        print("⚠️  No session file found - using fallback")
    
except Exception as e:
    print(f"❌ Error in automated setup: {e}")
    print("🔄 Falling back to manual configuration...")
    
    # Fallback to manual configuration
    sheet_info = [
        ('test2/may25-final.xlsx', 'Sheet1', 'May 25'),   # Last month raw sheet
        ('test2/June24_Invoice.xlsx', 'Raw data June 24', 'June 24'),        # Last year raw sheet
        ('test2/June25.xlsx', 'Sheet1', 'June 25')                # Latest month raw sheet
    ]
    target_config = {'path': 'test2/Target_June_25.xlsx', 'sheet': 'Target-June25'}
    session_config = {'path': 'test2/June_2025_Daily traffic.xlsx', 'sheet': 'download - 2025-01-08T160122.10'}

# Constants for easier sheet reference - DO NOT USE THESE DIRECTLY
# Instead, use the index to get the specific dataframe
LAST_MONTH_INDEX = 0  # Last month index
LAST_YEAR_INDEX = 1   # Last year index  
LATEST_MONTH_INDEX = 2      # Latest month index

# Display names for column headers
LAST_MONTH_DISPLAY = sheet_info[LAST_MONTH_INDEX][2] if len(sheet_info) > LAST_MONTH_INDEX else "Last Month"
LAST_YEAR_DISPLAY = sheet_info[LAST_YEAR_INDEX][2] if len(sheet_info) > LAST_YEAR_INDEX else "Last Year"
LATEST_DISPLAY = sheet_info[LATEST_MONTH_INDEX][2] if len(sheet_info) > LATEST_MONTH_INDEX else "Latest"

# Target sheet information  
TARGET_PATH = target_config['path'] if target_config else 'test2/Target_June_25.xlsx'
TARGET_SHEET = target_config['sheet'] if target_config else 'Target-June25'

# Read the session data with specific columns
important_columns = [
    'Day',
    'Channel', 
    'Sessions',
    'Purchases',
    'Purchase revenue',
    'CG',
    'Category'
]

# Read the session data
session_file_path = session_config['path'] if session_config else "test2/June_2025_Daily traffic.xlsx"
session_sheet_name = session_config['sheet'] if session_config else "download - 2025-01-08T160122.10"

try:
    session_df = pd.read_excel(session_file_path, sheet_name=session_sheet_name)
    print(f"📈 Session data loaded successfully from: {session_file_path}")
except Exception as e:
    print(f"⚠️  Error loading session data: {e}")
    print("Please check the file path and sheet name manually.")


✅ Configuration successful!
📁 Using DSR folder: test/DSR
📁 Found 3 invoice files:
   1. May 25: test/DSR\May-2025\may25_Invoice.xlsx -> Sheet1
   2. June 24: test/DSR\June-2024\June24_Invoice.xlsx -> Raw data June 24
   3. June 25: test/DSR\June-2025\Invoicedate_30.xlsx -> Sheet1
🎯 Target file: test/DSR\June-2025\Target_June_25.xlsx -> Target-June25
📊 Session file: test/DSR\June-2025\June_2025_Daily traffic.xlsx -> download - 2025-01-08T160122.10
📈 Session data loaded successfully from: test/DSR\June-2025\June_2025_Daily traffic.xlsx
📈 Session data loaded successfully from: test/DSR\June-2025\June_2025_Daily traffic.xlsx


In [292]:
# Define target Excel file 
output_path = 'invoice_day_channel_report_compatible.xlsx'

# AUTOMATED PATH CONFIGURATION
# Get user input for latest month-year
latest_month_year = latest_month_year.strip()

# Setup all paths automatically
try:
    config = setup_automated_paths(latest_month_year,dsr_folder_path)

    print( config)
    
    # Extract configuration
    sheet_info = config['sheet_info']
    target_config = config['target_info']
    session_config = config['session_info']
    
    print(f"\n✅ Configuration successful!")
    print(f"📁 Found {len(sheet_info)} invoice files:")
    for i, (path, sheet, display) in enumerate(sheet_info):
        print(f"   {i+1}. {display}: {path} -> {sheet}")
    
    if target_config:
        print(f"🎯 Target file: {target_config['path']} -> {target_config['sheet']}")
    else:
        print("⚠️  No target file found - using fallback")
    
    if session_config:
        print(f"📊 Session file: {session_config['path']} -> {session_config['sheet']}")
    else:
        print("⚠️  No session file found - using fallback")
    
except Exception as e:
    print(f"❌ Error in automated setup: {e}")
    print("🔄 Falling back to manual configuration...")
    
    # Fallback to manual configuration
    sheet_info = [
        ('test2/may25-final.xlsx', 'Sheet1', 'May 25'),   # Last month raw sheet
        ('test2/June24_Invoice.xlsx', 'Raw data June 24', 'June 24'),        # Last year raw sheet
        ('test2/June25.xlsx', 'Sheet1', 'June 25')                # Latest month raw sheet
    ]
    target_config = {'path': 'test2/Target_June_25.xlsx', 'sheet': 'Target-June25'}
    session_config = {'path': 'test2/June_2025_Daily traffic.xlsx', 'sheet': 'download - 2025-01-08T160122.10'}

# Constants for easier sheet reference - DO NOT USE THESE DIRECTLY
# Instead, use the index to get the specific dataframe
LAST_MONTH_INDEX = 0  # Last month index
LAST_YEAR_INDEX = 1   # Last year index  
LATEST_MONTH_INDEX = 2      # Latest month index

# Display names for column headers
LAST_MONTH_DISPLAY = sheet_info[LAST_MONTH_INDEX][2] if len(sheet_info) > LAST_MONTH_INDEX else "Last Month"
LAST_YEAR_DISPLAY = sheet_info[LAST_YEAR_INDEX][2] if len(sheet_info) > LAST_YEAR_INDEX else "Last Year"
LATEST_DISPLAY = sheet_info[LATEST_MONTH_INDEX][2] if len(sheet_info) > LATEST_MONTH_INDEX else "Latest"

# Target sheet information  
TARGET_PATH = target_config['path'] if target_config else 'test2/Target_June_25.xlsx'
TARGET_SHEET = target_config['sheet'] if target_config else 'Target-June25'

# Read the session data with specific columns
important_columns = [
    'Day',
    'Channel', 
    'Sessions',
    'Purchases',
    'Purchase revenue',
    'CG',
    'Category'
]

# Read the session data
session_file_path = session_config['path'] if session_config else "test2/June_2025_Daily traffic.xlsx"
session_sheet_name = session_config['sheet'] if session_config else "download - 2025-01-08T160122.10"

try:
    session_df = pd.read_excel(session_file_path, sheet_name=session_sheet_name)
    print(f"📈 Session data loaded successfully from: {session_file_path}")
except Exception as e:
    print(f"⚠️  Error loading session data: {e}")
    print("Please check the file path and sheet name manually.")

# Verify the configuration setup
print("🔧 Current Configuration:")
print(f"📁 DSR Folder: {dsr_folder_path}")
print(f"📅 Latest Month-Year: {latest_month_year}")

# Verify the paths are correctly set
if config:
    print(f"\n📊 Sheet Information:")
    for i, (path, sheet, display) in enumerate(sheet_info):
        print(f"   {i+1}. {display}: {os.path.basename(path)} -> {sheet}")
        # Verify file exists
        if os.path.exists(path):
            print(f"      ✅ File exists: {path}")
        else:
            print(f"      ❌ File not found: {path}")
    
    print(f"\n🎯 Target Configuration:")
    if target_config:
        print(f"   Path: {target_config['path']}")
        print(f"   Sheet: {target_config['sheet']}")
        if os.path.exists(target_config['path']):
            print(f"   ✅ Target file exists")
        else:
            print(f"   ❌ Target file not found")
    
    print(f"\n📈 Session Configuration:")
    if session_config:
        print(f"   Path: {session_config['path']}")
        print(f"   Sheet: {session_config['sheet']}")
        if os.path.exists(session_config['path']):
            print(f"   ✅ Session file exists")
        else:
            print(f"   ❌ Session file not found")

# Print the final paths that will be used
print(f"\n🔗 Final Paths to be Used:")
print(f"   TARGET_PATH: {TARGET_PATH}")
print(f"   TARGET_SHEET: {TARGET_SHEET}")
print(f"   Session Path: {session_file_path}")
print(f"   Session Sheet: {session_sheet_name}")

{'sheet_info': [('test/DSR\\May-2025\\may25_Invoice.xlsx', 'Sheet1', 'May 25'), ('test/DSR\\June-2024\\June24_Invoice.xlsx', 'Raw data June 24', 'June 24'), ('test/DSR\\June-2025\\Invoicedate_30.xlsx', 'Sheet1', 'June 25')], 'target_info': {'path': 'test/DSR\\June-2025\\Target_June_25.xlsx', 'sheet': 'Target-June25'}, 'session_info': {'path': 'test/DSR\\June-2025\\June_2025_Daily traffic.xlsx', 'sheet': 'download - 2025-01-08T160122.10'}, 'dates': {'latest': {'month': 'June', 'year': 2025, 'folder': 'June-2025'}, 'last_month': {'month': 'May', 'year': 2025, 'folder': 'May-2025'}, 'last_year': {'month': 'June', 'year': 2024, 'folder': 'June-2024'}}}

✅ Configuration successful!
📁 Found 3 invoice files:
   1. May 25: test/DSR\May-2025\may25_Invoice.xlsx -> Sheet1
   2. June 24: test/DSR\June-2024\June24_Invoice.xlsx -> Raw data June 24
   3. June 25: test/DSR\June-2025\Invoicedate_30.xlsx -> Sheet1
🎯 Target file: test/DSR\June-2025\Target_June_25.xlsx -> Target-June25
📊 Session file: tes

## Collect day-wise sums for each sheet

In [293]:
# Collect day-wise and TYPE-wise sums for each sheet
results = []
type_results = []
dfs = []  # Store the processed dataframes for each sheet

# First, process each sheet and store the dataframe, day sum, and type sum
for idx, (path, sheet, display_name) in enumerate(sheet_info):
    df = pd.read_excel(path, sheet_name=sheet)
    filtered_df = df[~df['idg'].isin(['FOC', 'Remove', 'WRT'])].copy()
    filtered_df['InvoiceDay'] = pd.to_datetime(filtered_df['InvoiceDate'], dayfirst=True, errors='coerce').dt.day
    
    # Map CC to Jumbo.ae in the TYPE column
    filtered_df['TYPE'] = filtered_df['TYPE'].replace('CC', 'Jumbo.ae')
    filtered_df['TYPE'] = filtered_df['TYPE'].replace('jumbo.ae', 'Jumbo.ae')
    
    # Day-wise sum
    invoice_day_sum = filtered_df.groupby('InvoiceDay')['Amount Invoiced W.O. VAT'].sum()
    results.append((idx, invoice_day_sum))  # Store the index instead of sheet name
    
    # TYPE-wise sum for Jumbo.ae and EA
    filtered_type = filtered_df[filtered_df['TYPE'].isin(['Jumbo.ae', 'EA'])]
    sum_by_day_type = filtered_type.groupby(['InvoiceDay', 'TYPE'])['Amount Invoiced W.O. VAT'].sum().unstack(fill_value=0)
    type_results.append((idx, sum_by_day_type))  # Store the index instead of sheet name
    
    # Store the processed dataframe
    dfs.append(filtered_df)

type_results

[(0,
  TYPE                EA    Jumbo.ae
  InvoiceDay                        
  1            82528.440  131579.770
  2            72707.104  137289.165
  3           103473.880  101385.259
  4           168560.074  108395.948
  5            58094.577  113710.142
  6            58095.246   89224.399
  7            37775.141   96162.604
  8            61612.441  120124.135
  9            55628.192   89562.995
  10          134201.993  103498.824
  11           72807.087   60733.907
  12           66942.023  114640.947
  13          102211.760   91853.379
  14          112782.215   84981.141
  15           71914.388  120735.735
  16          116657.573  139414.822
  17           83215.021   69736.386
  18          109342.020   73419.887
  19          150136.517  111620.875
  20           53445.182   97200.961
  21           86518.866   87950.652
  22           87820.262  126864.630
  23           70848.688   87096.973
  24          100283.105   81711.063
  25          107369.686   91275.

## Combine results into a single DataFrame

In [294]:
# Get all days
all_days = set()
for _, s in results:
    all_days.update(s.index)
all_days = sorted(all_days)

# Convert results to a DataFrame using sheet indices
output = pd.DataFrame({'Day': all_days})
for idx, s in results:
    sheet_display = sheet_info[idx][2]  # Get display name from sheet_info
    output[sheet_display] = output['Day'].map(s)  # Use display name as column name

## Sum the 'Target' column by day number from the 'Date' column in the target sheet

## Calculate Percentage Differences
Add comparison columns for each channel:
- 'v/s Last Year': Percentage difference between first sheet (Raw data May 24) and latest sheet (Sheet1)
- 'v/s Last Month': Percentage difference between middle sheet (Raw data April 25) and latest sheet (Sheet1)
- 'v/s Target': Percentage difference between Target value and latest sheet (Sheet1)

In [295]:
# Get target sums by day and channel using constants
target_df = pd.read_excel(TARGET_PATH, sheet_name=TARGET_SHEET)
target_sums = target_df.groupby(['Date', 'Channel'])['Target'].sum().unstack(fill_value=0).round(6)

In [296]:
# Define channels and prepare data for final output (changed order to Jumbo.ae, EA, Total)
channels = ['Jumbo.ae', 'EA', 'Total']
# Use display names for column headers, but keep original indices for data processing
display_names = [display_name for _, _, display_name in sheet_info]
subcolumns = display_names[:2] + ['Target'] + [display_names[2]] + ['v/s Target', 'v/s Last Year', 'v/s Last Month']

# Create the MultiIndex
arrays = [[], []]
for channel in channels:
    for subcol in subcolumns:
        arrays[0].append(channel)
        arrays[1].append(subcol)

multiindex = pd.MultiIndex.from_arrays(arrays, names=['Channel', 'Type'])

# Process data by channel
multiindex_data = {}
for channel in channels:
    channel_data = {}
    
    # Process the raw data using indices for data access
    for idx, (sheet_idx, df) in enumerate(type_results):
        if channel == 'Total':
            channel_data[sheet_idx] = df.sum(axis=1)
        else:
            channel_data[sheet_idx] = df[channel]

    # Add target data
    if channel == 'Total':
        channel_data['Target'] = target_sums.sum(axis=1)
    else:
        channel_data['Target'] = target_sums[channel]

    # Calculate percentage differences using indices
    vs_last_year = []
    for day in all_days:
        latest_sheet_value = channel_data[LATEST_MONTH_INDEX].get(day, 0)
        first_sheet_value = channel_data[LAST_YEAR_INDEX].get(day, 0)
        
        if first_sheet_value == 0:
            if latest_sheet_value == 0:
                pct_diff = 0
            else:
                pct_diff = float('inf')
        else:
            pct_diff = ((latest_sheet_value - first_sheet_value) / first_sheet_value) * 100
            pct_diff = int(round(pct_diff))
        
        vs_last_year.append(pct_diff)
    
    vs_last_month = []
    for day in all_days:
        latest_sheet_value = channel_data[LATEST_MONTH_INDEX].get(day, 0)
        middle_sheet_value = channel_data[LAST_MONTH_INDEX].get(day, 0)
        
        if middle_sheet_value == 0:
            if latest_sheet_value == 0:
                pct_diff = 0
            else:
                pct_diff = float('inf')
        else:
            pct_diff = ((latest_sheet_value - middle_sheet_value) / middle_sheet_value) * 100
            pct_diff = int(round(pct_diff))
        
        vs_last_month.append(pct_diff)
    
    vs_target = []
    for day in all_days:
        latest_sheet_value = channel_data[LATEST_MONTH_INDEX].get(day, 0)
        target_value = channel_data['Target'].get(day, 0)
        
        if target_value == 0:
            if latest_sheet_value == 0:
                pct_diff = 0
            else:
                pct_diff = float('inf')
        else:
            pct_diff = ((latest_sheet_value ) / target_value) * 100
            pct_diff = int(round(pct_diff))
        
        vs_target.append(pct_diff)
    
    # Store all calculated columns
    channel_data['v/s Last Year'] = vs_last_year
    channel_data['v/s Last Month'] = vs_last_month
    channel_data['v/s Target'] = vs_target
    multiindex_data[channel] = channel_data

In [297]:
# Prepare data for DataFrame
all_data = []

# Create mapping from display names to sheet indices for data access
display_to_idx = {
    LAST_MONTH_DISPLAY: LAST_MONTH_INDEX,
    LAST_YEAR_DISPLAY: LAST_YEAR_INDEX,
    LATEST_DISPLAY: LATEST_MONTH_INDEX
}

for day in all_days:
    row = []
    for channel in channels:
        for subcol in subcolumns:
            if subcol in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
                # Get the index of this day in the list
                day_index = all_days.index(day)
                row.append(multiindex_data[channel][subcol][day_index])
            elif subcol == 'Target':
                # Target data remains the same
                row.append(multiindex_data[channel]['Target'].get(day, 0))
            else:
                # Map display name to sheet index for data access
                sheet_idx = display_to_idx.get(subcol)
                if sheet_idx is not None:
                    row.append(multiindex_data[channel][sheet_idx].get(day, 0))
                else:
                    row.append(0)  # Default if mapping not found
    all_data.append(row)

# Create final DataFrame
final_output = pd.DataFrame(all_data, columns=multiindex)
final_output.insert(0, 'Day', all_days)

# Add day names based on the first day of the month provided by user
# We'll add this in Excel formatting since we need the user input for first day of month

# Save initial data to Excel
final_output.to_excel(output_path)

In [298]:
import calendar

def get_first_day_of_month(month_year):
    """
    Determine the first day of the month given a month-year string (e.g., 'June-2025')
    Returns the day name in lowercase (e.g., 'monday', 'tuesday', etc.)
    """
    month_name, year = month_year.split('-')
    month_name = month_name.capitalize()
    year = int(year)
    month_num = list(calendar.month_name).index(month_name)
    
    # Get the weekday of the first day of the month (0 = Monday, 6 = Sunday)
    first_weekday = calendar.weekday(year, month_num, 1)
    
    # Convert to day name (0 = Monday, 6 = Sunday)
    days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    return str(days[first_weekday]).capitalize()



In [299]:
# Format the Excel file
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
from openpyxl.utils import get_column_letter
from openpyxl.formatting.rule import Rule
from openpyxl.styles.differential import DifferentialStyle
from datetime import datetime, timedelta

# Define the output path (make sure this file exists or create it first)
output_path = "invoice_day_channel_report_compatible.xlsx"

# Get the first day of the month from user
while True:
    first_day = get_first_day_of_month(latest_month_year).strip().lower()
    if first_day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
        break
    print('Invalid input! Please enter a valid day name.')

# Create a mapping of days to their position in a week (0=Monday to 6=Sunday)
day_positions = {
    'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3,
    'friday': 4, 'saturday': 5, 'sunday': 6
}
day_names = ['Mon', 'Tues', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

# Get position of the first day (0-6, where 0 is Monday)
first_day_position = day_positions[first_day]

def get_week_info(day_of_month, first_day_pos):
    """Get week information for a given day.
    Returns (week_number, is_first_partial_week)"""
    # For days in the first partial week
    if first_day_pos > 0:  # If month doesn't start on Monday
        days_till_next_monday = 7 - first_day_pos
        if day_of_month <= days_till_next_monday:
            return 1, True
        # Adjust day number to calculate remaining weeks
        adjusted_day = day_of_month - days_till_next_monday
        return (adjusted_day - 1) // 7 + 2, False
    else:  # If month starts on Monday
        return (day_of_month - 1) // 7 + 1, False

def get_day_name(day_number, first_day_pos):
    """Get the day name for a given day of month"""
    # Calculate the day of week (0-6, where 0 is Monday)
    day_of_week = (first_day_pos + day_number - 1) % 7
    return day_names[day_of_week]

try:
    wb = load_workbook(output_path)
    ws = wb.active
except FileNotFoundError:
    print(f"Error: File {output_path} not found. Please ensure the file exists.")
    exit(1)

# Get dimensions
data_rows = ws.max_row
data_cols = ws.max_column

# Insert a new column for day names after the Day column
ws.insert_cols(3)  # Insert after Day column (column 2)

# Define styles
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill("solid", fgColor="4472C4")
subheader_fill = PatternFill("solid", fgColor="8EA9DB")
total_fill = PatternFill("solid", fgColor="FFC000")
border_style = Side(style='thin')
border = Border(left=border_style, right=border_style, top=border_style, bottom=border_style)

# Insert title row
ws.insert_rows(1)
title_cell = ws.cell(row=1, column=1)
title_cell.value = f"Invoice Day Channel Report - Generated on {datetime.now().strftime('%Y-%m-%d')}"
title_cell.font = Font(bold=True, size=14)
title_cell.alignment = Alignment(horizontal="center")

# Set up the day name column headers
ws.cell(row=2, column=3).value = ""  # Channel level header
ws.cell(row=3, column=3).value = "Day Name"  # Type level header

# Apply formatting to headers
for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
    # Format the cell in title row
    ws.cell(row=1, column=col).border = border
    
    # Top header (channel)
    top_header_cell = ws.cell(row=2, column=col)
    top_header_cell.font = header_font
    top_header_cell.fill = header_fill
    top_header_cell.alignment = Alignment(horizontal="center", vertical="center")
    top_header_cell.border = border
    
    # Second header (sheet/target/comparison)
    second_header_cell = ws.cell(row=3, column=col)
    second_header_cell.font = header_font
    second_header_cell.fill = subheader_fill
    second_header_cell.alignment = Alignment(horizontal="center", vertical="center")
    second_header_cell.border = border

# Populate the day name column
day_col_idx = 2  # Day column (accounting for title row)
day_name_col_idx = 3  # Day Name column
data_start_row = 4  # First data row

# Add day names for each day number
for row in range(data_start_row, data_rows + 2):
    day_cell = ws.cell(row=row, column=day_col_idx)
    day_name_cell = ws.cell(row=row, column=day_name_col_idx)
    
    if isinstance(day_cell.value, (int, float)):
        # Get the day name for this day number
        day_name = get_day_name(int(day_cell.value), first_day_position)
        day_name_cell.value = day_name
        day_name_cell.alignment = Alignment(horizontal="center")
    
    # Apply border and fill based on row
    day_name_cell.border = border
    day_name_cell.fill = PatternFill("solid", fgColor="F2F2F2") if row % 2 == 0 else PatternFill()

# Auto-adjust column widths for all columns
for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
    max_length = 0
    for row in range(1, data_rows + 2):
        cell_value = ws.cell(row=row, column=col).value
        if cell_value:
            max_length = max(max_length, len(str(cell_value)))
    adjusted_width = max(max_length + 2, 12)  # minimum width of 12
    ws.column_dimensions[get_column_letter(col)].width = adjusted_width

# Format data cells
for row in range(4, data_rows + 2):  # Start after headers and title
    # Apply row striping
    row_fill = PatternFill("solid", fgColor="F2F2F2") if row % 2 == 0 else PatternFill()
    
    for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
        if col == day_name_col_idx:  # Skip day name column as it's already formatted
            continue
            
        cell = ws.cell(row=row, column=col)
        cell.fill = row_fill
        
        header_value = ws.cell(row=3, column=col).value
        
        # Format based on content
        if isinstance(cell.value, (int, float)):
            # Format percentage columns
            if header_value in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
                if cell.value == float('inf'):
                    cell.value = 'N/A'
                else:
                    value = int(round(cell.value)) # Raw percentage value, e.g., -10, 25, 150
                    
                    if header_value == 'v/s Target':
                        # For v/s Target, original text display was absolute value + %
                        cell.value = f"{abs(value)}%" 
                        # Coloring for v/s Target
                        if value >= 100:
                            cell.font = Font(color="006100")  # Dark green text
                            cell.fill = PatternFill(start_color='C6EFCE', end_color='C6EFCE', fill_type='solid')  # Light green fill
                        else:
                            cell.font = Font(color="9C0006")  # Dark red text
                            cell.fill = PatternFill(start_color='FFC7CE', end_color='FFC7CE', fill_type='solid')  # Light red fill
                    else:  # For 'v/s Last Year' and 'v/s Last Month'
                        if value > 0:
                            cell.value = f"+{value}%"
                            cell.font = Font(color="006100")  # Dark green text
                            cell.fill = PatternFill(start_color='C6EFCE', end_color='C6EFCE', fill_type='solid')  # Light green fill
                        elif value < 0:
                            cell.value = f"{value}%"  # Negative sign is included with 'value'
                            cell.font = Font(color="9C0006")  # Dark red text
                            cell.fill = PatternFill(start_color='FFC7CE', end_color='FFC7CE', fill_type='solid')  # Light red fill
                        else:  # value == 0
                            cell.value = "0%"
                            cell.font = Font(color="000000") # Black text for neutral
                            # cell.fill = PatternFill() # No specific fill, row striping applies, or set a neutral one
                cell.alignment = Alignment(horizontal="center")
            else: # Not a percentage column
                cell.number_format = '#,##0'
        
        cell.border = border
        
        # Center align Day column
        if header_value == 'Day':
            cell.alignment = Alignment(horizontal="center")

# Group days into weeks and add subtotals
data_start_row = 4  # First data row
week_ranges = []  # Store (start_row, end_row, week_num) for each week
subtotal_rows = []  # Store the row numbers of subtotals for grand total calculation

# First, collect all the day rows and their week numbers
day_rows = []
for row in range(data_start_row, data_rows + 2):
    day = ws.cell(row=row, column=day_col_idx).value
    if isinstance(day, (int, float)):
        # Get week number and whether it's part of the first partial week
        week_num, is_partial = get_week_info(int(day), first_day_position)
        day_rows.append((row, day, week_num, is_partial))

# Now organize them into week ranges
current_week = None
week_start_row = None
is_current_partial = False

for i, (row, day, week, is_partial) in enumerate(day_rows):
    if current_week != week:
        if current_week is not None:
            # End the previous week
            week_ranges.append((week_start_row, row - 1, current_week, is_current_partial))
        # Start a new week
        current_week = week
        week_start_row = row
        is_current_partial = is_partial
    
    # Handle the last week
    if i == len(day_rows) - 1:
        week_ranges.append((week_start_row, row, week, is_partial))

# Insert subtotal rows
rows_added = 0
for start_row, end_row, week_num, is_partial in week_ranges:
    # Adjusted for previously added subtotal rows
    adjusted_start = start_row + rows_added
    adjusted_end = end_row + rows_added
    
    # Insert the subtotal row
    ws.insert_rows(adjusted_end + 1)
    rows_added += 1
    subtotal_rows.append(adjusted_end + 1)  # Store the subtotal row number
    
    # Format subtotal row
    for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
        subtotal_cell = ws.cell(row=adjusted_end + 1, column=col)
        subtotal_cell.font = Font(bold=True)
        subtotal_cell.fill = total_fill
        subtotal_cell.border = border
        
        if col == day_col_idx:
            if is_partial:
                days_in_partial = 7 - first_day_position
                subtitle = f"Week 1 (Partial: {days_in_partial} days)"
            else:
                subtitle = f"Week {week_num}"
            subtotal_cell.value = f"{subtitle} Subtotal"
            subtotal_cell.alignment = Alignment(horizontal="center")
        
        # Add Day Name subtotal cell
        elif col == day_name_col_idx:
            subtotal_cell.value = ""  # Leave empty for subtotals
        
        elif col > day_name_col_idx:  # Adjust for the Day Name column
            header_value = ws.cell(row=3, column=col).value
            col_letter = get_column_letter(col)
            
            if header_value == 'v/s Target':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 1  # Current month data (Test) is 1 column before v/s Target
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Target" for this channel - now at column E
                target_col = col - 2  # Target data is 2 columns before v/s Target
                target_letter = get_column_letter(target_col)
                
                # Create formula to calculate percentage: (Test) / Target
                # Ensure the formula outputs a number, and handle division by zero by outputting 0.
                formula = f"=IF({target_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1})/{target_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"'  # Apply percentage number format
                subtotal_cell.alignment = Alignment(horizontal="center")
            
            elif header_value == 'v/s Last Year':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 2  # Current month data (Test) is 2 columns before v/s Last Year
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Raw data May 24" (last year) for this channel - at column D
                may24_col = col - 4  # Last year data (Raw data May 24) is 4 columns before v/s Last Year
                may24_letter = get_column_letter(may24_col)
                
                # Create formula to calculate percentage: (Test - Raw data May 24) / Raw data May 24
                # Ensure the formula outputs a number, and handle division by zero by outputting 0.
                formula = f"=IF({may24_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{may24_letter}{adjusted_end+1})/{may24_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"' # Display as percentage
                subtotal_cell.alignment = Alignment(horizontal="center")
                
            elif header_value == 'v/s Last Month':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 3  # Current month data (Test) is 3 columns before v/s Last Month
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Raw data April 25" (last month) for this channel - at column C
                april25_col = col - 6  # Last month data (Raw data April 25) is 6 columns before v/s Last Month
                april25_letter = get_column_letter(april25_col)
                
                # Create formula to calculate percentage: (Test - Raw data April 25) / Raw data April 25
                # Ensure the formula outputs a number, and handle division by zero by outputting 0 for the numeric part.
                # The original formula appended "%" making it text.
                formula_numeric_part = f"IF({april25_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{april25_letter}{adjusted_end+1})/{april25_letter}{adjusted_end+1}*100,0))"
                
                # The rule for these columns is > 0 is green, < 0 is red.
                # The original code for data rows adds a "+" sign.
                # For consistency with conditional formatting rules (which expect numbers), we'll keep it numeric.
                # The display format will be handled by number_format if needed, or Excel's default for numbers.
                # However, the original code for subtotal formulas for these columns also appended "%".
                # Let's make them numeric and apply number format for consistency with 'v/s Target' approach.
                formula = f"=IF({april25_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{april25_letter}{adjusted_end+1})/{april25_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"' # Display as percentage
                subtotal_cell.alignment = Alignment(horizontal="center")
                
            elif header_value not in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
                # Calculate sum for this week's range
                subtotal_cell.value = f"=SUM({col_letter}{adjusted_start}:{col_letter}{adjusted_end})"
                subtotal_cell.number_format = '#,##0'

# Add grand total row
grand_total_row = data_rows + rows_added + 2
ws.insert_rows(grand_total_row)

# Format grand total row
for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
    grand_total_cell = ws.cell(row=grand_total_row, column=col)
    grand_total_cell.font = Font(bold=True)
    grand_total_cell.fill = total_fill
    grand_total_cell.border = border
    
    if col == day_col_idx:
        grand_total_cell.value = "Grand Total"
        grand_total_cell.alignment = Alignment(horizontal="center")
    elif col == day_name_col_idx:
        grand_total_cell.value = ""
    elif col > day_name_col_idx:  # Adjust for the Day Name column
        header_value = ws.cell(row=3, column=col).value
        col_letter = get_column_letter(col)
        
        if header_value == 'v/s Target':
            # Get the column letters for Test (column F) and Target (column E) for grand total
            sheet1_col = col - 1
            sheet1_letter = get_column_letter(sheet1_col)
            target_col = col - 2
            target_letter = get_column_letter(target_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({target_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row})/{target_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"'  # Apply percentage number format
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value == 'v/s Last Year':
            # Get the column letters for Test (column F) and Raw data May 24 (column D) for grand total
            sheet1_col = col - 2
            sheet1_letter = get_column_letter(sheet1_col)
            may24_col = col - 4
            may24_letter = get_column_letter(may24_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({may24_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row}-{may24_letter}{grand_total_row})/{may24_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"' # Display as percentage
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value == 'v/s Last Month':
            # Get the column letters for Test (column F) and Raw data April 25 (column C) for grand total
            sheet1_col = col - 3
            sheet1_letter = get_column_letter(sheet1_col)
            april25_col = col - 6
            april25_letter = get_column_letter(april25_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({april25_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row}-{april25_letter}{grand_total_row})/{april25_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"' # Display as percentage
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value not in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
            # Build formula to sum only the weekly subtotal rows
            if subtotal_rows:  # Only if we have subtotal rows
                subtotal_ranges = [f"{col_letter}{row}" for row in subtotal_rows]
                formula = "=SUM(" + ",".join(subtotal_ranges) + ")"
                grand_total_cell.value = formula
                grand_total_cell.number_format = '#,##0'

# Apply conditional formatting with simpler rules
# Define styles for conditional formatting
green_fill = PatternFill(start_color='C6EFCE', end_color='C6EFCE', fill_type='solid')
red_fill = PatternFill(start_color='FFC7CE', end_color='FFC7CE', fill_type='solid')
green_font = Font(color='006100', bold=True)
red_font = Font(color='9C0006', bold=True)

# Create differential styles
green_style = DifferentialStyle(fill=green_fill, font=green_font)
red_style = DifferentialStyle(fill=red_fill, font=red_font)

# Apply conditional formatting to percentage columns in subtotal and grand total rows
percentage_columns = []
for col in range(day_name_col_idx + 1, data_cols + 2):
    header_value = ws.cell(row=3, column=col).value
    if header_value in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
        percentage_columns.append(col)

# Apply formatting to subtotal rows
for subtotal_row in subtotal_rows:
    for col in percentage_columns:
        header_value = ws.cell(row=3, column=col).value
        cell_range = f"{get_column_letter(col)}{subtotal_row}"
        
        try:
            if header_value == 'v/s Target':
                # For Target: Green if >= 100, Red if < 100
                green_rule = Rule(type="cellIs", operator="greaterThanOrEqual", formula=[100], dxf=green_style)
                red_rule = Rule(type="cellIs", operator="lessThan", formula=[100], dxf=red_style)
            else:
                # For Last Year and Last Month: Green if > 0, Red if < 0
                green_rule = Rule(type="cellIs", operator="greaterThan", formula=[0], dxf=green_style)
                red_rule = Rule(type="cellIs", operator="lessThan", formula=[0], dxf=red_style)
            
            ws.conditional_formatting.add(cell_range, green_rule)
            ws.conditional_formatting.add(cell_range, red_rule)
        except Exception as e:
            print(f"Warning: Could not apply conditional formatting to {cell_range}: {e}")

# Apply formatting to grand total row
for col in percentage_columns:
    header_value = ws.cell(row=3, column=col).value
    cell_range = f"{get_column_letter(col)}{grand_total_row}"
    
    try:
        if header_value == 'v/s Target':
            # For Target: Green if >= 100, Red if < 100
            green_rule = Rule(type="cellIs", operator="greaterThanOrEqual", formula=[100], dxf=green_style)
            red_rule = Rule(type="cellIs", operator="lessThan", formula=[100], dxf=red_style)
        else:
            # For Last Year and Last Month: Green if > 0, Red if < 0
            green_rule = Rule(type="cellIs", operator="greaterThan", formula=[0], dxf=green_style)
            red_rule = Rule(type="cellIs", operator="lessThan", formula=[0], dxf=red_style)
        
        ws.conditional_formatting.add(cell_range, green_rule)
        ws.conditional_formatting.add(cell_range, red_rule)
    except Exception as e:
        print(f"Warning: Could not apply conditional formatting to {cell_range}: {e}")

# Manually merge headers - be careful for future changes
try:
    # Only merge if we have enough columns
    if data_cols >= 24:  # Adjust based on your actual column count
        ws.merge_cells('D2:J2')
        ws.merge_cells('K2:Q2')
        ws.merge_cells('R2:X2')
    else:
        print("Warning: Not enough columns to merge headers as specified")
        ws.merge_cells('D2:J2')
        ws.merge_cells('K2:Q2')
        ws.merge_cells('R2:X2')
except Exception as e:
    print(f"Warning: Could not merge headers: {e}")

# Save the final formatted workbook
try:
    wb.save(output_path)
    print(f"Created final Excel report at {output_path}")
    print(f"First day of month was {first_day.capitalize()}, weeks are aligned to start on Monday")
    print(f"Added day name column next to the Day column for better readability")
except Exception as e:
    print(f"Error saving file: {e}")
    # Try saving with a different name
    backup_path = output_path.replace('.xlsx', '_backup.xlsx')
    try:
        wb.save(backup_path)
        print(f"Saved backup file as {backup_path}")
    except Exception as e2:
        print(f"Could not save backup file either: {e2}")

Created final Excel report at invoice_day_channel_report_compatible.xlsx
First day of month was Sunday, weeks are aligned to start on Monday
Added day name column next to the Day column for better readability


## Session Data Processing
Read and display the data from session.xlsx file

In [300]:


# Apply filters with case-insensitive comparison for Category
# 1. Remove Gift Card from Category (case-insensitive)
# 2. Remove EA and Endless Aisle from CG
session_df = session_df[
    (~session_df['Category'].str.lower().str.contains('gift card', na=False)) & 
    (~session_df['CG'].isin(['EA', 'Endless Aisle']))
]

session_df['Date'] = pd.to_datetime(session_df['Date'], format='%Y%m%d', errors='coerce')

# Convert 'Day' column to day number (handles both date and string types)
session_df['Day'] = session_df['Date'].dt.day

# Select only the important columns
session_df = session_df[important_columns]

session_df


Unnamed: 0,Day,Channel,Sessions,Purchases,Purchase revenue,CG,Category
0,1.0,Paid Perf,1.0,1.0,2498.999999,,
1,1.0,Organic,1.0,1.0,3899.000000,,
2,1.0,Paid Perf,1.0,1.0,8999.000001,Online,Television & Home Theaters
3,1.0,Paid Perf,1.0,1.0,1699.000000,Online,Headphones & Speakers
4,1.0,Email,1.0,1.0,498.000000,Online,Health and Personal Care
...,...,...,...,...,...,...,...
9550,,,,,,,
9551,,,,,,,
9552,,,,,,,
9553,,,,,,,


In [301]:
month_days = 31

## Channel-wise Session, Purchases, and Purchase Revenue
Group the filtered session data by Channel and aggregate Sessions, Purchases, and Purchase revenue for Email, Organic, Paid Perf, and Paid Other.

In [302]:
# Import IPython display to avoid conflicts with overridden display variable
from IPython.display import display

# Get all unique channels
channels_of_interest = sorted(session_df['Channel'].dropna().unique())

# Filter for the channels
filtered = session_df[session_df['Channel'].isin(channels_of_interest)]

# Group by Channel and aggregate
agg = filtered.groupby('Channel')[['Sessions', 'Purchases', 'Purchase revenue']].sum().reset_index()

# Display the result
print("Available channels:")
print(channels_of_interest)
print("\nAggregated data by Channel (Sessions, Purchases, Purchase revenue):")
display(agg)

Available channels:
['Affiliates', 'Display', 'Email', 'Organic', 'Paid Other', 'Paid Perf', 'Paid Social']

Aggregated data by Channel (Sessions, Purchases, Purchase revenue):


Unnamed: 0,Channel,Sessions,Purchases,Purchase revenue
0,Affiliates,21374.0,135.0,366036.549996
1,Display,964.0,0.0,0.0
2,Email,19687.0,15.0,18955.000008
3,Organic,661888.0,392.0,837672.070007
4,Paid Other,2736.0,0.0,0.0
5,Paid Perf,189773.0,430.0,883550.999991
6,Paid Social,412.0,0.0,0.0


## Day and Channel-wise Session, Purchases, and Purchase Revenue
Group the filtered session data by Day number and Channel, aggregating Sessions, Purchases, and Purchase revenue for Email, Organic, Paid Perf, and Paid Other.

In [303]:
# Group by Day and Channel, aggregate Sessions, Purchases, and Purchase revenue
agg_day_channel = filtered.groupby(['Day', 'Channel'])[['Sessions', 'Purchases', 'Purchase revenue']].sum().reset_index()

# Display the result
print("Aggregated data by Day and Channel (Sessions, Purchases, Purchase revenue):")
display(agg_day_channel)

Aggregated data by Day and Channel (Sessions, Purchases, Purchase revenue):


Unnamed: 0,Day,Channel,Sessions,Purchases,Purchase revenue
0,1.0,Affiliates,717.0,1.0,1199.000000
1,1.0,Display,28.0,0.0,0.000000
2,1.0,Email,1513.0,5.0,4938.000004
3,1.0,Organic,17785.0,13.0,20482.000002
4,1.0,Paid Other,143.0,0.0,0.000000
...,...,...,...,...,...
203,30.0,Email,1338.0,0.0,0.000000
204,30.0,Organic,18529.0,21.0,51587.000006
205,30.0,Paid Other,31.0,0.0,0.000000
206,30.0,Paid Perf,5273.0,12.0,43359.999998


## Pivot Table: Day-wise Channel Split for Sessions, Purchases, and Purchase Revenue
A table with super columns for Sessions, Purchases, and Purchase revenue, each split by channel (Email, Organic, Paid Perf, Paid Other), and Day as the index.

In [304]:
import numpy as np
import pandas as pd

# Assuming 'filtered' DataFrame is already defined in the environment

# Create a complete DataFrame with all days (1-31) for each channel
all_days = list(range(1, 32))  # All days in a month (1-31)
all_channels = sorted(filtered['Channel'].unique())

# Create empty dataframe with all possible day-channel combinations
full_month_data = []
for day in all_days:
    for channel in all_channels:
        full_month_data.append({
            'Day': day,
            'Channel': channel,
            'Sessions': 0,
            'Purchases': 0,
            'Purchase revenue': 0.0
        })

# Create full month DataFrame
full_month_df = pd.DataFrame(full_month_data)

# Update with actual data where available
actual_data = filtered.groupby(['Day', 'Channel']).agg({
    'Sessions': 'sum',
    'Purchases': 'sum',
    'Purchase revenue': 'sum'
}).reset_index()

# Merge actual data with full month data
full_month_df = pd.merge(
    full_month_df,
    actual_data,
    on=['Day', 'Channel'],
    how='left',
    suffixes=('_full', '')
).fillna(0)

# Keep only the columns we need
full_month_df = full_month_df[['Day', 'Channel', 'Sessions', 'Purchases', 'Purchase revenue']]

# Create the pivot table using the full month data
pivot = full_month_df.pivot_table(
    index='Day',
    columns='Channel',
    values=['Sessions', 'Purchases', 'Purchase revenue'],
    aggfunc='sum',
    fill_value=0
)

# Get all channels in sorted order
channels_order = sorted(full_month_df['Channel'].unique())
metrics_order = ['Sessions', 'Purchases', 'Purchase revenue']

# Build MultiIndex columns in the desired order
pivot = pivot.reindex(columns=pd.MultiIndex.from_product([metrics_order, channels_order]))

# Calculate CVR and AOV for each channel
cvr_data = {}
aov_data = {}

for channel in channels_order:
    # Calculate CVR (Conversion Rate) = (Purchases / Sessions) * 100
    cvr = (pivot[('Purchases', channel)] / pivot[('Sessions', channel)] * 100).round(2)
    cvr_data[channel] = cvr.replace([np.inf, -np.inf], 0)  # Handle division by zero
    
    # Calculate AOV (Average Order Value) = Purchase revenue / Purchases
    aov = (pivot[('Purchase revenue', channel)] / pivot[('Purchases', channel)]).round(2)
    aov_data[channel] = aov.replace([np.inf, -np.inf], 0)  # Handle division by zero

# Add CVR and AOV to the pivot table
for channel in channels_order:
    pivot[('CVR', channel)] = cvr_data[channel]
    pivot[('AOV', channel)] = aov_data[channel]

# Update metrics order to include new columns
metrics_order = ['Sessions', 'Purchases', 'Purchase revenue', 'CVR', 'AOV']

# Reorder all columns according to the updated metrics
pivot = pivot.reindex(columns=pd.MultiIndex.from_product([metrics_order, channels_order]))

# Reset index for display
pivot = pivot.reset_index()

# Display the result
print("Day-wise channel split with Sessions, Purchases, Purchase revenue, CVR, and AOV (all days 1-31):")
display(pivot)

Day-wise channel split with Sessions, Purchases, Purchase revenue, CVR, and AOV (all days 1-31):


Unnamed: 0_level_0,Day,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Purchases,Purchases,...,CVR,CVR,CVR,AOV,AOV,AOV,AOV,AOV,AOV,AOV
Unnamed: 0_level_1,Unnamed: 1_level_1,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,...,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social
0,1,717.0,28.0,1513.0,17785.0,143.0,8699.0,3.0,1.0,0.0,...,0.0,0.13,0.0,1199.0,,987.6,1575.54,,1822.73,
1,2,750.0,31.0,1591.0,17498.0,44.0,8386.0,0.0,6.0,0.0,...,0.0,0.16,,2065.67,,,2214.57,,2097.3,
2,3,711.0,39.0,735.0,18318.0,34.0,7807.0,6.0,2.0,0.0,...,0.0,0.22,0.0,3648.5,,,2042.82,,2045.71,
3,4,630.0,39.0,262.0,18834.0,32.0,7901.0,35.0,7.0,0.0,...,0.0,0.16,0.0,2212.43,,,1365.88,,1637.08,
4,5,552.0,80.0,558.0,17143.0,12.0,7989.0,49.0,6.0,0.0,...,0.0,0.19,0.0,1912.33,,2098.33,2232.17,,1683.53,
5,6,555.0,100.0,830.0,21662.0,76.0,8083.0,34.0,2.0,0.0,...,0.0,0.19,0.0,1499.0,,,2034.0,,916.67,
6,7,507.0,93.0,942.0,16903.0,106.0,8270.0,29.0,1.0,0.0,...,0.0,0.17,0.0,479.0,,3598.0,1094.0,,1164.78,
7,8,572.0,85.0,1033.0,16817.0,506.0,8262.0,14.0,3.0,0.0,...,0.0,0.16,0.0,3009.0,,499.0,2210.31,,2223.31,
8,9,563.0,9.0,441.0,18020.0,72.0,7038.0,4.0,4.0,0.0,...,0.0,0.26,0.0,3324.0,,569.0,1803.08,,1754.44,
9,10,663.0,4.0,187.0,18733.0,32.0,6719.0,7.0,2.0,0.0,...,0.0,0.18,0.0,2474.0,,,4675.56,,1143.17,


In [305]:
import numpy as np
import pandas as pd

# Apply styling to the pivot table
def style_df(val, props=''):
    return props

# Create a complete DataFrame with all days for each channel
all_days = list(range(1, month_days + 1))  # Use month_days instead of hardcoding 31
all_channels = sorted(filtered['Channel'].unique())

# Create empty dataframe with all possible day-channel combinations
full_month_data = []
for day in all_days:
    for channel in all_channels:
        full_month_data.append({
            'Day': day,
            'Channel': channel,
            'Sessions': 0,
            'Purchases': 0,
            'Purchase revenue': 0.0
        })

# Create full month DataFrame
full_month_df = pd.DataFrame(full_month_data)

# Update with actual data where available
actual_data = filtered.groupby(['Day', 'Channel']).agg({
    'Sessions': 'sum',
    'Purchases': 'sum',
    'Purchase revenue': 'sum'
}).reset_index()

# Merge actual data with full month data
full_month_df = pd.merge(
    full_month_df,
    actual_data,
    on=['Day', 'Channel'],
    how='left',
    suffixes=('_full', '')
).fillna(0)

# Keep only the columns we need
full_month_df = full_month_df[['Day', 'Channel', 'Sessions', 'Purchases', 'Purchase revenue']]

# Create the pivot table using the full month data
pivot = full_month_df.pivot_table(
    index='Day',
    columns='Channel',
    values=['Sessions', 'Purchases', 'Purchase revenue'],
    aggfunc='sum',
    fill_value=0
)

# Get all channels in sorted order
channels_order = sorted(full_month_df['Channel'].unique())
metrics_order = ['Sessions', 'Purchases', 'Purchase revenue']

# Build MultiIndex columns in the desired order
pivot = pivot.reindex(columns=pd.MultiIndex.from_product([metrics_order, channels_order]))

# Calculate CVR and AOV for each channel
cvr_data = {}
aov_data = {}

for channel in channels_order:
    # Calculate CVR (Conversion Rate) = (Purchases / Sessions) * 100
    cvr = (pivot[('Purchases', channel)] / pivot[('Sessions', channel)] * 100).round(2)
    cvr_data[channel] = cvr.replace([np.inf, -np.inf], 0)  # Handle division by zero
    
    # Calculate AOV (Average Order Value) = Purchase revenue / Purchases
    aov = (pivot[('Purchase revenue', channel)] / pivot[('Purchases', channel)]).round(2)
    aov_data[channel] = aov.replace([np.inf, -np.inf], 0)  # Handle division by zero

# Add CVR and AOV to the pivot table
for channel in channels_order:
    pivot[('CVR', channel)] = cvr_data[channel]
    pivot[('AOV', channel)] = aov_data[channel]

# Update metrics order to include new columns
metrics_order = ['Sessions', 'Purchases', 'Purchase revenue', 'CVR', 'AOV']

# Reorder all columns according to the updated metrics
pivot = pivot.reindex(columns=pd.MultiIndex.from_product([metrics_order, channels_order]))

# Reset index for display
pivot = pivot.reset_index()

# Display the result
print(f"Day-wise channel split with Sessions, Purchases, Purchase revenue, CVR, and AOV (all days 1-{month_days}):")
display(pivot)

# Define the styling for different metrics
styled_pivot = pivot.style\
    .format({('CVR', channel): '{:.2f}%' for channel in channels_order})\
    .format({('AOV', channel): '${:,.2f}' for channel in channels_order})\
    .format({('Purchase revenue', channel): '${:,.2f}' for channel in channels_order})\
    .format({('Sessions', channel): '{:,.0f}' for channel in channels_order})\
    .format({('Purchases', channel): '{:,.0f}' for channel in channels_order})

# Display the styled pivot table
display(styled_pivot)

Day-wise channel split with Sessions, Purchases, Purchase revenue, CVR, and AOV (all days 1-31):



Unnamed: 0_level_0,Day,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Purchases,Purchases,...,CVR,CVR,CVR,AOV,AOV,AOV,AOV,AOV,AOV,AOV
Unnamed: 0_level_1,Unnamed: 1_level_1,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,...,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social
0,1,717.0,28.0,1513.0,17785.0,143.0,8699.0,3.0,1.0,0.0,...,0.0,0.13,0.0,1199.0,,987.6,1575.54,,1822.73,
1,2,750.0,31.0,1591.0,17498.0,44.0,8386.0,0.0,6.0,0.0,...,0.0,0.16,,2065.67,,,2214.57,,2097.3,
2,3,711.0,39.0,735.0,18318.0,34.0,7807.0,6.0,2.0,0.0,...,0.0,0.22,0.0,3648.5,,,2042.82,,2045.71,
3,4,630.0,39.0,262.0,18834.0,32.0,7901.0,35.0,7.0,0.0,...,0.0,0.16,0.0,2212.43,,,1365.88,,1637.08,
4,5,552.0,80.0,558.0,17143.0,12.0,7989.0,49.0,6.0,0.0,...,0.0,0.19,0.0,1912.33,,2098.33,2232.17,,1683.53,
5,6,555.0,100.0,830.0,21662.0,76.0,8083.0,34.0,2.0,0.0,...,0.0,0.19,0.0,1499.0,,,2034.0,,916.67,
6,7,507.0,93.0,942.0,16903.0,106.0,8270.0,29.0,1.0,0.0,...,0.0,0.17,0.0,479.0,,3598.0,1094.0,,1164.78,
7,8,572.0,85.0,1033.0,16817.0,506.0,8262.0,14.0,3.0,0.0,...,0.0,0.16,0.0,3009.0,,499.0,2210.31,,2223.31,
8,9,563.0,9.0,441.0,18020.0,72.0,7038.0,4.0,4.0,0.0,...,0.0,0.26,0.0,3324.0,,569.0,1803.08,,1754.44,
9,10,663.0,4.0,187.0,18733.0,32.0,6719.0,7.0,2.0,0.0,...,0.0,0.18,0.0,2474.0,,,4675.56,,1143.17,


Unnamed: 0_level_0,Day,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Sessions,Purchases,Purchases,Purchases,Purchases,Purchases,Purchases,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,Purchase revenue,Purchase revenue,Purchase revenue,Purchase revenue,CVR,CVR,CVR,CVR,CVR,CVR,CVR,AOV,AOV,AOV,AOV,AOV,AOV,AOV
Unnamed: 0_level_1,Unnamed: 1_level_1,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social,Affiliates,Display,Email,Organic,Paid Other,Paid Perf,Paid Social
0,1,717.0,28.0,1513.0,17785.0,143.0,8699.0,3.0,1,0,5,13,0,11,0,1199.0,0.0,4938.000004,20482.000002,0.0,20050.000004,0.0,0.14,0.0,0.33,0.07,0.0,0.13,0.0,1199.0,,987.6,1575.54,,1822.73,
1,2,750.0,31.0,1591.0,17498.0,44.0,8386.0,0.0,6,0,0,14,0,13,0,12393.999996,0.0,0.0,31003.999996,0.0,27264.850001,0.0,0.8,0.0,0.0,0.08,0.0,0.16,,2065.67,,,2214.57,,2097.3,
2,3,711.0,39.0,735.0,18318.0,34.0,7807.0,6.0,2,0,0,11,0,17,0,7297.0,0.0,0.0,22470.999997,0.0,34776.999995,0.0,0.28,0.0,0.0,0.06,0.0,0.22,0.0,3648.5,,,2042.82,,2045.71,
3,4,630.0,39.0,262.0,18834.0,32.0,7901.0,35.0,7,0,0,16,0,13,0,15487.000002,0.0,0.0,21854.000009,0.0,21281.999999,0.0,1.11,0.0,0.0,0.08,0.0,0.16,0.0,2212.43,,,1365.88,,1637.08,
4,5,552.0,80.0,558.0,17143.0,12.0,7989.0,49.0,6,0,3,6,0,15,0,11473.999997,0.0,6294.999999,13393.000001,0.0,25252.999991,0.0,1.09,0.0,0.54,0.03,0.0,0.19,0.0,1912.33,,2098.33,2232.17,,1683.53,
5,6,555.0,100.0,830.0,21662.0,76.0,8083.0,34.0,2,0,0,14,0,15,0,2997.999998,0.0,0.0,28475.999997,0.0,13749.999996,0.0,0.36,0.0,0.0,0.06,0.0,0.19,0.0,1499.0,,,2034.0,,916.67,
6,7,507.0,93.0,942.0,16903.0,106.0,8270.0,29.0,1,0,1,14,0,14,0,478.999998,0.0,3597.999999,15316.000012,0.0,16306.850004,0.0,0.2,0.0,0.11,0.08,0.0,0.17,0.0,479.0,,3598.0,1094.0,,1164.78,
7,8,572.0,85.0,1033.0,16817.0,506.0,8262.0,14.0,3,0,1,13,0,13,0,9027.0,0.0,499.000002,28734.000007,0.0,28903.000003,0.0,0.52,0.0,0.1,0.08,0.0,0.16,0.0,3009.0,,499.0,2210.31,,2223.31,
8,9,563.0,9.0,441.0,18020.0,72.0,7038.0,4.0,4,0,1,12,0,18,0,13296.000001,0.0,569.000001,21636.999998,0.0,31579.999996,0.0,0.71,0.0,0.23,0.07,0.0,0.26,0.0,3324.0,,569.0,1803.08,,1754.44,
9,10,663.0,4.0,187.0,18733.0,32.0,6719.0,7.0,2,0,0,9,0,12,0,4947.999998,0.0,0.0,42080.000001,0.0,13718.000002,0.0,0.3,0.0,0.0,0.05,0.0,0.18,0.0,2474.0,,,4675.56,,1143.17,


In [306]:
# Export pivot table to Excel with formatting
output_path = 'session_channel_report.xlsx'
pivot.to_excel(output_path)

# Format the Excel file
from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment
from openpyxl.utils import get_column_letter

# # Get the first day of the month from user (reuse the same input)
# while True:
#     first_day = input('Enter the first day of the month (Monday/Tuesday/Wednesday/Thursday/Friday/Saturday/Sunday): ').strip().lower()
#     if first_day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
#         break
#     print('Invalid input! Please enter a valid day name.')

# # Create a mapping of days to their position in a week (0=Monday to 6=Sunday)
# day_positions = {
#     'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3,
#     'friday': 4, 'saturday': 5, 'sunday': 6
# }
# day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# # Get position of the first day (0-6, where 0 is Monday)
# first_day_position = day_positions[first_day]


def get_week_info(day_of_month, first_day_pos):
    """Get week information for a given day.
    Returns (week_number, is_first_partial_week)"""
    if first_day_pos > 0:  # If month doesn't start on Monday
        days_till_next_monday = 7 - first_day_pos
        if day_of_month <= days_till_next_monday:
            return 1, True
        adjusted_day = day_of_month - days_till_next_monday
        return (adjusted_day - 1) // 7 + 2, False
    else:  # If month starts on Monday
        return (day_of_month - 1) // 7 + 1, False

def get_day_name(day_number, first_day_pos):
    """Get the day name for a given day of month"""
    day_of_week = (first_day_pos + day_number - 1) % 7
    return day_names[day_of_week]

wb = load_workbook(output_path)
ws = wb.active

# Get dimensions
data_rows = ws.max_row
data_cols = ws.max_column

# Insert a new column for day names after the Day column
ws.insert_cols(3)

# Define styles
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill("solid", fgColor="4472C4")
subheader_fill = PatternFill("solid", fgColor="8EA9DB")
total_fill = PatternFill("solid", fgColor="FFC000")
border_style = Side(style='thin')
border = Border(left=border_style, right=border_style, top=border_style, bottom=border_style)

# Insert title row
ws.insert_rows(1)
title_cell = ws.cell(row=1, column=1)
title_cell.value = f"Session Channel Report - Generated on {datetime.now().strftime('%Y-%m-%d')}"
title_cell.font = Font(bold=True, size=14)
title_cell.alignment = Alignment(horizontal="center")

# Set up headers
for col in range(1, data_cols + 2):
    # Format cells in title row
    ws.cell(row=1, column=col).border = border
    
    # Top header (metrics)
    top_header_cell = ws.cell(row=2, column=col)
    top_header_cell.font = header_font
    top_header_cell.fill = header_fill
    top_header_cell.border = border
    top_header_cell.alignment = Alignment(horizontal="center")
    
    # Second header (channels)
    second_header_cell = ws.cell(row=3, column=col)
    second_header_cell.font = header_font
    second_header_cell.fill = subheader_fill
    second_header_cell.border = border
    second_header_cell.alignment = Alignment(horizontal="center")

# Add day names
day_col_idx = 2
day_name_col_idx = 3
data_start_row = 4

# Add day names for each day number
for row in range(data_start_row, data_rows + 2):
    day_cell = ws.cell(row=row, column=day_col_idx)
    day_name_cell = ws.cell(row=row, column=day_name_col_idx)
    
    if isinstance(day_cell.value, (int, float)):
        day_name = get_day_name(int(day_cell.value), first_day_position)
        day_name_cell.value = day_name
        day_name_cell.alignment = Alignment(horizontal="center")
    
    day_name_cell.border = border
    day_name_cell.fill = PatternFill("solid", fgColor="F2F2F2") if row % 2 == 0 else PatternFill()

# Collect day rows and group into weeks
day_rows = []
for row in range(data_start_row, data_rows + 2):
    day = ws.cell(row=row, column=day_col_idx).value
    if isinstance(day, (int, float)):
        week_num, is_partial = get_week_info(int(day), first_day_position)
        day_rows.append((row, day, week_num, is_partial))

# Organize into week ranges
week_ranges = []
subtotal_rows = []
current_week = None
week_start_row = None
is_current_partial = False

for i, (row, day, week, is_partial) in enumerate(day_rows):
    if current_week != week:
        if current_week is not None:
            # End the previous week
            week_ranges.append((week_start_row, row - 1, current_week, is_current_partial))
        # Start a new week
        current_week = week
        week_start_row = row
        is_current_partial = is_partial
    
    # Handle the last week
    if i == len(day_rows) - 1:
        week_ranges.append((week_start_row, row, week, is_partial))

# Insert subtotal rows
rows_added = 0
for start_row, end_row, week_num, is_partial in week_ranges:
    adjusted_start = start_row + rows_added
    adjusted_end = end_row + rows_added
    
    ws.insert_rows(adjusted_end + 1)
    rows_added += 1
    subtotal_rows.append(adjusted_end + 1)
    
    # Format subtotal row
    for col in range(1, data_cols + 2):
        subtotal_cell = ws.cell(row=adjusted_end + 1, column=col)
        subtotal_cell.font = Font(bold=True)
        subtotal_cell.fill = total_fill
        subtotal_cell.border = border
        
        if col == day_col_idx:
            if is_partial:
                days_in_partial = 7 - first_day_position
                subtitle = f"Week 1 (Partial: {days_in_partial} days)"
            else:
                subtitle = f"Week {week_num}"
            subtotal_cell.value = f"{subtitle} Subtotal"
            subtotal_cell.alignment = Alignment(horizontal="center")
        
        # Add Day Name subtotal cell
        elif col == day_name_col_idx:
            subtotal_cell.value = ""  # Leave empty for subtotals
        
        elif col > day_name_col_idx:  # Adjust for the Day Name column
            header_value = ws.cell(row=3, column=col).value
            col_letter = get_column_letter(col)
            
            if header_value == 'v/s Target':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 1  # Current month data (Test) is 1 column before v/s Target
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Target" for this channel - now at column E
                target_col = col - 2  # Target data is 2 columns before v/s Target
                target_letter = get_column_letter(target_col)
                
                # Create formula to calculate percentage: (Test) / Target
                # Ensure the formula outputs a number, and handle division by zero by outputting 0.
                formula = f"=IF({target_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1})/{target_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"'  # Apply percentage number format
                subtotal_cell.alignment = Alignment(horizontal="center")
            
            elif header_value == 'v/s Last Year':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 2  # Current month data (Test) is 2 columns before v/s Last Year
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Raw data May 24" (last year) for this channel - at column D
                may24_col = col - 4  # Last year data (Raw data May 24) is 4 columns before v/s Last Year
                may24_letter = get_column_letter(may24_col)
                
                # Create formula to calculate percentage: (Test - Raw data May 24) / Raw data May 24
                # Ensure the formula outputs a number, and handle division by zero by outputting 0.
                formula = f"=IF({may24_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{may24_letter}{adjusted_end+1})/{may24_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"' # Display as percentage
                subtotal_cell.alignment = Alignment(horizontal="center")
                
            elif header_value == 'v/s Last Month':
                # Get the column letter for "Test" (current month) for this channel - now at column F
                sheet1_col = col - 3  # Current month data (Test) is 3 columns before v/s Last Month
                sheet1_letter = get_column_letter(sheet1_col)
                
                # Get the column letter for "Raw data April 25" (last month) for this channel - at column C
                april25_col = col - 6  # Last month data (Raw data April 25) is 6 columns before v/s Last Month
                april25_letter = get_column_letter(april25_col)
                
                # Create formula to calculate percentage: (Test - Raw data April 25) / Raw data April 25
                # Ensure the formula outputs a number, and handle division by zero by outputting 0 for the numeric part.
                # The original formula appended "%" making it text.
                formula_numeric_part = f"IF({april25_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{april25_letter}{adjusted_end+1})/{april25_letter}{adjusted_end+1}*100,0))"
                
                # The rule for these columns is > 0 is green, < 0 is red.
                # The original code for data rows adds a "+" sign.
                # For consistency with conditional formatting rules (which expect numbers), we'll keep it numeric.
                # The display format will be handled by number_format if needed, or Excel's default for numbers.
                # However, the original code for subtotal formulas for these columns also appended "%".
                # Let's make them numeric and apply number format for consistency with 'v/s Target' approach.
                formula = f"=IF({april25_letter}{adjusted_end+1}=0,0,ROUND(({sheet1_letter}{adjusted_end+1}-{april25_letter}{adjusted_end+1})/{april25_letter}{adjusted_end+1}*100,0))"
                subtotal_cell.value = formula
                subtotal_cell.number_format = '0.00\"%\"' # Display as percentage
                subtotal_cell.alignment = Alignment(horizontal="center")
                
            elif header_value not in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
                # Calculate sum for this week's range
                subtotal_cell.value = f"=SUM({col_letter}{adjusted_start}:{col_letter}{adjusted_end})"
                subtotal_cell.number_format = '#,##0'

# Add grand total row
grand_total_row = data_rows + rows_added + 2
ws.insert_rows(grand_total_row)

# Format grand total row
for col in range(1, data_cols + 2):  # +2 to account for the new column and title row
    grand_total_cell = ws.cell(row=grand_total_row, column=col)
    grand_total_cell.font = Font(bold=True)
    grand_total_cell.fill = total_fill
    grand_total_cell.border = border
    
    if col == day_col_idx:
        grand_total_cell.value = "Grand Total"
        grand_total_cell.alignment = Alignment(horizontal="center")
    elif col == day_name_col_idx:
        grand_total_cell.value = ""
    elif col > day_name_col_idx:  # Adjust for the Day Name column
        header_value = ws.cell(row=3, column=col).value
        col_letter = get_column_letter(col)
        
        if header_value == 'v/s Target':
            # Get the column letters for Test (column F) and Target (column E) for grand total
            sheet1_col = col - 1
            sheet1_letter = get_column_letter(sheet1_col)
            target_col = col - 2
            target_letter = get_column_letter(target_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({target_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row})/{target_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"'  # Apply percentage number format
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value == 'v/s Last Year':
            # Get the column letters for Test (column F) and Raw data May 24 (column D) for grand total
            sheet1_col = col - 2
            sheet1_letter = get_column_letter(sheet1_col)
            may24_col = col - 4
            may24_letter = get_column_letter(may24_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({may24_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row}-{may24_letter}{grand_total_row})/{may24_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"' # Display as percentage
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value == 'v/s Last Month':
            # Get the column letters for Test (column F) and Raw data April 25 (column C) for grand total
            sheet1_col = col - 3
            sheet1_letter = get_column_letter(sheet1_col)
            april25_col = col - 6
            april25_letter = get_column_letter(april25_col)
            
            # Create formula for grand total percentage - numeric output
            formula = f"=IF({april25_letter}{grand_total_row}=0,0,ROUND(({sheet1_letter}{grand_total_row}-{april25_letter}{grand_total_row})/{april25_letter}{grand_total_row}*100,0))"
            grand_total_cell.value = formula
            grand_total_cell.number_format = '0.00\"%\"' # Display as percentage
            grand_total_cell.alignment = Alignment(horizontal="center")
            
        elif header_value not in ['v/s Target', 'v/s Last Year', 'v/s Last Month']:
            # Build formula to sum only the weekly subtotal rows
            if subtotal_rows:  # Only if we have subtotal rows
                subtotal_ranges = [f"{col_letter}{row}" for row in subtotal_rows]
                formula = "=SUM(" + ",".join(subtotal_ranges) + ")"
                grand_total_cell.value = formula
                grand_total_cell.number_format = '#,##0'

# Format numbers
for row in range(4, grand_total_row + 1):
    for col in range(4, data_cols + 2):
        cell = ws.cell(row=row, column=col)
        header = ws.cell(row=3, column=col).value
        
        if header == 'CVR':
            cell.number_format = '0"%"'  # Fixed escape sequence
        elif header == 'AOV' or header == 'Purchase revenue':
            cell.number_format = '$#,##0'  # Fixed escape sequence
        elif header in ['Sessions', 'Purchases']:
            cell.number_format = '#,##0'

# Auto-adjust column widths
for col in range(1, data_cols + 2):
    max_length = 0
    for row in range(1, grand_total_row + 1):
        cell_value = ws.cell(row=row, column=col).value
        if cell_value:
            max_length = max(max_length, len(str(cell_value)))
    adjusted_width = max(max_length + 2, 12)
    ws.column_dimensions[get_column_letter(col)].width = adjusted_width

# Calculate column spans for each metric
metrics = ['Sessions', 'Purchases', 'Purchase revenue', 'CVR', 'AOV']
channels_count = len(channels_order)
start_col = 4  # Start after Day and Day Name columns

for metric in metrics:
    end_col = start_col + channels_count - 1
    
    # Set the value in the first cell
    cell = ws.cell(row=2, column=start_col)
    cell.value = metric
    cell.font = header_font
    cell.fill = header_fill
    cell.alignment = Alignment(horizontal="center")
    
    # Then merge the cells
    ws.merge_cells(start_row=2, start_column=start_col, end_row=2, end_column=end_col)
    
    # Move to next section
    start_col = end_col + 1

# Save the workbook
wb.save(output_path)
print(f"Created session channel report at {output_path}")
print(f"First day of month was {first_day.capitalize()}, weeks are aligned to start on Monday")


Created session channel report at session_channel_report.xlsx
First day of month was Sunday, weeks are aligned to start on Monday


## Copy Session Channel Data
Copy data from session channel report to invoice day channel report, preserving all formatting

In [307]:
from openpyxl import load_workbook
from copy import copy

# Copy data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx
def copy_session_data():
    """
    Copy session data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx
    Starting from cell D2 in source to cell Y2 in destination
    """
    source_path = 'session_channel_report.xlsx'
    dest_path = 'invoice_day_channel_report_compatible.xlsx'
    
    # Load both workbooks
    source_wb = load_workbook(source_path,data_only=True)
    dest_wb = load_workbook(dest_path)
    
    source_ws = source_wb.active
    dest_ws = dest_wb.active
    
    # Get the range of data to copy (starts from D2)
    source_max_row = source_ws.max_row
    source_max_col = source_ws.max_column
    
    # Copy data from D2 onwards in source to Y2 onwards in destination
    source_start_col = 4  # Column D
    dest_start_col = 25  # Column Y
    
    # Copy values and formatting
    for row in range(2, source_max_row + 1):
        for col_offset in range(source_max_col - source_start_col + 1):
            source_col = source_start_col + col_offset
            dest_col = dest_start_col + col_offset
            
            # Get source cell
            source_cell = source_ws.cell(row=row, column=source_col)
            
            # Get destination cell
            dest_cell = dest_ws.cell(row=row, column=dest_col)
            
            # Copy value
            dest_cell.value = source_cell.value
            
            # Copy formatting
            if source_cell.has_style:
                dest_cell.font = copy(source_cell.font)
                dest_cell.fill = copy(source_cell.fill)
                dest_cell.border = copy(source_cell.border)
                dest_cell.alignment = copy(source_cell.alignment)
                dest_cell.number_format = source_cell.number_format
    
    # Save the destination workbook
    dest_wb.save(dest_path)
    print(f"Successfully copied session data from {source_path} to {dest_path}")

# Execute the copy function
copy_session_data()

Successfully copied session data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx


In [308]:
import win32com.client
import os
import time

def open_save_close_excel_dynamic(file_path):
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        return

    excel_app = None
    try:
        excel_app = win32com.client.DispatchEx("Excel.Application")
        excel_app.Visible = False
        excel_app.DisplayAlerts = False

        workbook = excel_app.Workbooks.Open(file_path, ReadOnly=False)

        # Wait for Excel to finish calculating
        XL_CALCULATION_DONE = 0
        max_wait_time = 300
        check_interval = 0.5
        start_time = time.time()

        print(f"Waiting for calculation to complete for {file_path}...")
        while excel_app.CalculationState != XL_CALCULATION_DONE:
            if time.time() - start_time > max_wait_time:
                print(f"Warning: Calculation did not complete in {max_wait_time} seconds. Proceeding.")
                break
            time.sleep(check_interval)
        print("Calculation complete.")

        # Save and close
        workbook.Save()
        workbook.Close(SaveChanges=0)
        print(f"Successfully opened, saved, and closed: {file_path}")

    except Exception as e:
        print(f"❌ Error occurred: {e}")
    finally:
        if excel_app:
            excel_app.Quit()
            excel_app = None


## Copy Session Channel Data with Merged Cells
Copy data from session channel report to invoice day channel report, preserving all formatting including merged cells

In [309]:
import os
def copy_session_data():
    """
    Copy session data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx
    Starting from cell D2 in source to cell Y2 in destination, preserving merged cells
    """
    source_path = 'session_channel_report.xlsx'
    dest_path = 'invoice_day_channel_report_compatible.xlsx'

    # if os.path.exists(source_path):
    #     os.startfile(source_path)
    
    # _=input("Open the session_channel_report.xlsx, save it, then close it again and press Enter to continue...")
    if os.path.exists(source_path):
        print(f"Opening {os.path.abspath(source_path)} to ensure it's saved and closed properly...")
        open_save_close_excel_dynamic(os.path.abspath(source_path))

    # Load both workbooks
    source_wb = load_workbook(source_path, data_only=True)
    dest_wb = load_workbook(dest_path)
    
    source_ws = source_wb.active
    dest_ws = dest_wb.active
    
    # Get the range of data to copy (starts from D2)
    source_max_row = source_ws.max_row
    source_max_col = source_ws.max_column
    
    # Copy data from D2 onwards in source to Y2 onwards in destination
    source_start_col = 4  # Column D
    dest_start_col = 25  # Column Y
    
    # First, handle any merged cells in the header rows
    # Get all merged ranges from source
    merged_ranges = source_ws.merged_cells.ranges
    
    # For each merged range in source that starts with our copied columns
    for merged_range in merged_ranges:
        if merged_range.min_col >= source_start_col:
            # Calculate the offset for the destination
            col_offset = dest_start_col - source_start_col
            
            # Create new merge range coordinates for destination
            new_min_col = merged_range.min_col + col_offset
            new_max_col = merged_range.max_col + col_offset
            new_range = f"{get_column_letter(new_min_col)}{merged_range.min_row}:{get_column_letter(new_max_col)}{merged_range.max_row}"
            
            # Merge the cells in destination
            try:
                dest_ws.merge_cells(new_range)
                
                # Copy the value from the top-left cell of the merge range
                source_value = source_ws.cell(merged_range.min_row, merged_range.min_col).value
                dest_ws.cell(merged_range.min_row, new_min_col).value = source_value
                
                # Copy formatting from the first cell of merge range
                source_cell = source_ws.cell(merged_range.min_row, merged_range.min_col)
                dest_cell = dest_ws.cell(merged_range.min_row, new_min_col)
                
                if source_cell.has_style:
                    dest_cell.font = copy(source_cell.font)
                    dest_cell.fill = copy(source_cell.fill)
                    dest_cell.border = copy(source_cell.border)
                    dest_cell.alignment = copy(source_cell.alignment)
                    dest_cell.number_format = source_cell.number_format
            except ValueError:
                # If merge range already exists, just update the value and formatting
                dest_ws.cell(merged_range.min_row, new_min_col).value = source_value
    
    # Now copy the rest of the data and formatting
    for row in range(2, source_max_row + 1):
        for col_offset in range(source_max_col - source_start_col + 1):
            source_col = source_start_col + col_offset
            dest_col = dest_start_col + col_offset
            
            # Skip if this cell is part of a merged range
            if any(merged_range.min_row <= row <= merged_range.max_row and 
                  merged_range.min_col <= source_col <= merged_range.max_col 
                  for merged_range in merged_ranges):
                continue
            
            # Get source cell
            source_cell = source_ws.cell(row=row, column=source_col)
            
            # Get destination cell
            dest_cell = dest_ws.cell(row=row, column=dest_col)
            
            # Copy value
            dest_cell.value = source_cell.value
            
            # Copy formatting if it has any
            if source_cell.has_style:
                dest_cell.font = copy(source_cell.font)
                dest_cell.fill = copy(source_cell.fill)
                dest_cell.border = copy(source_cell.border)
                dest_cell.alignment = copy(source_cell.alignment)
                dest_cell.number_format = source_cell.number_format
    
    # Save the destination workbook
    dest_wb.save(dest_path)
    print(f"Successfully copied session data from {source_path} to {dest_path}")

# Execute the copy function
copy_session_data()

Opening c:\Users\91843\Documents\VsCode Codes\ReportAutomation\session_channel_report.xlsx to ensure it's saved and closed properly...
Waiting for calculation to complete for c:\Users\91843\Documents\VsCode Codes\ReportAutomation\session_channel_report.xlsx...
Calculation complete.
Successfully opened, saved, and closed: c:\Users\91843\Documents\VsCode Codes\ReportAutomation\session_channel_report.xlsx
Waiting for calculation to complete for c:\Users\91843\Documents\VsCode Codes\ReportAutomation\session_channel_report.xlsx...
Calculation complete.
Successfully opened, saved, and closed: c:\Users\91843\Documents\VsCode Codes\ReportAutomation\session_channel_report.xlsx
Successfully copied session data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx
Successfully copied session data from session_channel_report.xlsx to invoice_day_channel_report_compatible.xlsx


## Final Processing
Delete session_channel_report.xlsx and hide the first column in invoice_day_channel_report_compatible.xlsx

In [310]:
import os
from openpyxl import load_workbook

# Step 1: Delete session_channel_report.xlsx
if os.path.exists('session_channel_report.xlsx'):
    os.remove('session_channel_report.xlsx')
    print("Deleted 'session_channel_report.xlsx'")
else:
    print("'session_channel_report.xlsx' not found, skipping deletion")

# Step 2: Hide the first column in invoice_day_channel_report_compatible.xlsx
report_path = 'invoice_day_channel_report_compatible.xlsx'

# Load the workbook
wb = load_workbook(report_path)
ws = wb.active

# Hide the first column (column A)
ws.column_dimensions['A'].hidden = True
ws.freeze_panes = 'D4'  # Freeze the first three rows and the first three columns

# Save the workbook
wb.save(report_path)
print(f"Hidden the first column in '{report_path}'")
print("All processing complete!")


Deleted 'session_channel_report.xlsx'
Hidden the first column in 'invoice_day_channel_report_compatible.xlsx'
All processing complete!


In [311]:
# Automated Path Configuration for Weekly Analysis
import os
import glob
import calendar
import pandas as pd

def get_month_year_combinations(latest_month_year):
    """
    Get the latest month, last month, and last year combinations
    """
    # Parse the latest month-year
    month_name, year = latest_month_year.split('-')
    year = int(year)
    month_num = list(calendar.month_name).index(month_name)
    
    # Calculate last month
    if month_num == 1:  # January
        last_month_num = 12
        last_month_year_num = year - 1
    else:
        last_month_num = month_num - 1
        last_month_year_num = year
    
    # Calculate last year same month
    last_year_month_num = month_num
    last_year_year_num = year - 1
    
    # Convert back to names
    last_month_name = calendar.month_name[last_month_num]
    last_year_month_name = calendar.month_name[last_year_month_num]
    
    return {
        'latest': {'month': month_name, 'year': year, 'folder': f"{month_name}-{year}"},
        'last_month': {'month': last_month_name, 'year': last_month_year_num, 'folder': f"{last_month_name}-{last_month_year_num}"},
        'last_year': {'month': last_year_month_name, 'year': last_year_year_num, 'folder': f"{last_year_month_name}-{last_year_year_num}"}
    }

def find_file_by_keyword(folder_path, keyword):
    """
    Find a file in the folder that contains the keyword in its name
    """
    if not os.path.exists(folder_path):
        return None
    
    files = os.listdir(folder_path)
    for file in files:
        if keyword.lower() in file.lower() and file.endswith('.xlsx'):
            return os.path.join(folder_path, file)
    return None

def get_sheet_name_with_keyword(file_path, keyword):
    """
    Get the sheet name that contains the keyword
    """
    try:
        xl = pd.ExcelFile(file_path)
        for sheet_name in xl.sheet_names:
            if keyword.lower() in sheet_name.lower():
                return sheet_name
        # If no sheet with keyword found, return the first sheet
        return xl.sheet_names[0] if xl.sheet_names else None
    except:
        return None

def setup_automated_paths_weekly(latest_month_year, dsr_folder_path=None):
    """
    Setup all paths automatically for weekly analysis
    
    Parameters:
    latest_month_year: str - Format: "June-2025"
    dsr_folder_path: str - Full path to DSR folder (e.g., "C:/Users/Username/Documents/DSR")
                           If None, defaults to "DSR" in current directory
    
    Returns:
    dict containing all the required paths and configurations
    """
    
    # Get month-year combinations
    dates = get_month_year_combinations(latest_month_year)
    
    # Base DSR folder path - use provided path or default to current directory
    if dsr_folder_path is None:
        dsr_path = os.path.join(os.getcwd(), "DSR")
    else:
        dsr_path = os.path.abspath(dsr_folder_path)
        
    print(f"🔍 Looking for DSR folder at: {dsr_path}")
    
    # Prepare results
    sheet_info = []
    sessions_info = []
    target_info = {}
    
    # Process each period (last_month, last_year, latest)
    periods = ['last_month', 'last_year', 'latest']
    display_names = [
        f"{dates['last_month']['month']} {dates['last_month']['year'] % 100}",  # May 25
        f"{dates['last_year']['month']} {dates['last_year']['year'] % 100}",   # June 24
        f"{dates['latest']['month']} {dates['latest']['year'] % 100}"          # June 25
    ]
    
    for i, period in enumerate(periods):
        period_data = dates[period]
        folder_path = os.path.join(dsr_path, period_data['folder'])
        
        # Find invoice file
        invoice_file = find_file_by_keyword(folder_path, 'invoice')
        if invoice_file:
            # Get the first sheet (since invoice files have only one sheet)
            try:
                xl = pd.ExcelFile(invoice_file)
                sheet_name = xl.sheet_names[0] if xl.sheet_names else 'Sheet1'
            except:
                sheet_name = 'Sheet1'
            
            # Make path relative to current working directory
            rel_path = os.path.relpath(invoice_file, os.getcwd())
            sheet_info.append((rel_path, sheet_name, display_names[i]))
        
        # Find traffic/session file
        traffic_file = find_file_by_keyword(folder_path, 'traffic')
        if traffic_file:
            download_sheet = get_sheet_name_with_keyword(traffic_file, 'download')
            if not download_sheet:
                # If no download sheet found, get the first sheet
                try:
                    xl = pd.ExcelFile(traffic_file)
                    download_sheet = xl.sheet_names[0] if xl.sheet_names else 'Sheet1'
                except:
                    download_sheet = 'Sheet1'
            
            # Make path relative to current working directory
            rel_path = os.path.relpath(traffic_file, os.getcwd())
            sessions_info.append((rel_path, download_sheet, display_names[i]))
    
    # Setup target information (using latest month)
    latest_folder = os.path.join(dsr_path, dates['latest']['folder'])
    target_file = find_file_by_keyword(latest_folder, 'target')
    if target_file:
        target_sheet = get_sheet_name_with_keyword(target_file, 'target')
        if not target_sheet:
            target_sheet = 'Target'  # Default if not found
        
        target_info = {
            'path': os.path.relpath(target_file, os.getcwd()),
            'sheet': target_sheet
        }
    
    return {
        'sheet_info': sheet_info,
        'sessions_info': sessions_info,
        'target_info': target_info,
        'dates': dates
    }

# Get user input for the latest month and DSR path
print("📊 Weekly Analysis - Automated Path Configuration")
print("=" * 50)
print("Please enter the latest month in format 'Month-Year' (e.g., 'June-2025'):")
print("You'll also be asked for the DSR folder path if it's not in the default location.")
print("Note: Make sure the DSR folder exists with the appropriate month folders.")
print("This will configure both invoice data and session data automatically.")

📊 Weekly Analysis - Automated Path Configuration
Please enter the latest month in format 'Month-Year' (e.g., 'June-2025'):
You'll also be asked for the DSR folder path if it's not in the default location.
Note: Make sure the DSR folder exists with the appropriate month folders.
This will configure both invoice data and session data automatically.


In [312]:
# Apply Automated Configuration
latest_month_year = latest_month_year.strip()
dsr_folder_path = dsr_folder_path

# Setup all paths automatically
try:
    config = setup_automated_paths_weekly(latest_month_year, dsr_folder_path)
    
    # Extract configuration
    sheet_info = config['sheet_info']
    sessions_info = config['sessions_info']
    target_config = config['target_info']
    
    print(f"\n✅ Weekly Analysis Configuration successful!")
    if dsr_folder_path:
        print(f"📁 Using DSR folder: {dsr_folder_path}")
    print(f"📁 Found {len(sheet_info)} invoice files:")
    for i, (path, sheet, display) in enumerate(sheet_info):
        print(f"   {i+1}. {display}: {path} -> {sheet}")
    
    print(f"\n📊 Found {len(sessions_info)} session/traffic files:")
    for i, (path, sheet, display) in enumerate(sessions_info):
        print(f"   {i+1}. {display}: {path} -> {sheet}")
    
    if target_config:
        print(f"\n🎯 Target file: {target_config['path']} -> {target_config['sheet']}")
        TARGET_PATH = target_config['path']
        TARGET_SHEET = target_config['sheet']
    else:
        print("\n⚠️  No target file found - using fallback")
        TARGET_PATH = 'test2/Target_June_25.xlsx'
        TARGET_SHEET = 'Target-June25'
    
    print(f"\n🚀 Ready for weekly analysis with automated paths!")
    
except Exception as e:
    print(f"❌ Error in automated setup: {e}")
    print("🔄 Falling back to manual configuration...")
    
    # Fallback to manual configuration
    sheet_info = [
        ('test2/may25-final.xlsx', 'Sheet1', 'May 25'),   # Last month raw sheet
        ('test2/June24_Invoice.xlsx', 'Raw data June 24', 'June 24'),        # Last year raw sheet
        ('test2/June25.xlsx', 'Sheet1', 'June 25')                # Latest month raw sheet
    ]
    
    sessions_info = [
        ('test2/May_2025_Daily traffic (2).xlsx', 'download - 2025-01-08T160122.10', 'May 25'), # Last month session
        ('test2/June Traffic -2024.xlsx', 'download - 2025-06-03T09524 (2)', 'June 24'),  # Last year session
        ('test2/June_2025_Daily traffic.xlsx', 'download - 2025-01-08T160122.10', 'June 25')   # Current month session
    ]
    
    TARGET_PATH = 'test2/Target_June_25.xlsx'
    TARGET_SHEET = 'Target-June25'

🔍 Looking for DSR folder at: c:\Users\91843\Documents\VsCode Codes\ReportAutomation\test\DSR

✅ Weekly Analysis Configuration successful!
📁 Using DSR folder: test/DSR
📁 Found 3 invoice files:
   1. May 25: test\DSR\May-2025\may25_Invoice.xlsx -> Sheet1
   2. June 24: test\DSR\June-2024\June24_Invoice.xlsx -> Raw data June 24
   3. June 25: test\DSR\June-2025\Invoicedate_30.xlsx -> Sheet1

📊 Found 3 session/traffic files:
   1. May 25: test\DSR\May-2025\May_2025_Daily_traffic.xlsx -> download - 2025-01-08T160122.10
   2. June 24: test\DSR\June-2024\June_2024_Daily_traffic.xlsx -> download - 2025-06-03T09524 (2)
   3. June 25: test\DSR\June-2025\June_2025_Daily traffic.xlsx -> download - 2025-01-08T160122.10

🎯 Target file: test\DSR\June-2025\Target_June_25.xlsx -> Target-June25

🚀 Ready for weekly analysis with automated paths!

✅ Weekly Analysis Configuration successful!
📁 Using DSR folder: test/DSR
📁 Found 3 invoice files:
   1. May 25: test\DSR\May-2025\may25_Invoice.xlsx -> Sheet1
 

In [313]:
# Get necessary imports
import pandas as pd
import openpyxl
from openpyxl.styles import Font, PatternFill, Border, Side, Alignment, numbers
from openpyxl.utils import get_column_letter
from datetime import datetime
import calendar
from copy import copy  # For copying Excel cell styles

In [314]:
# Define target Excel file 
output_path = 'weekly.xlsx'

# ✅ USING AUTOMATED CONFIGURATION
# sheet_info, sessions_info, TARGET_PATH, and TARGET_SHEET are now configured automatically above
# If automation failed, they fall back to manual configuration

# Verify configuration is loaded
if 'sheet_info' not in globals():
    print("⚠️  sheet_info not found - please run the automated configuration cells above")
if 'sessions_info' not in globals():
    print("⚠️  sessions_info not found - please run the automated configuration cells above")

# Get latest invoice data for max day calculation
latest_path, latest_sheet, latest_display = sheet_info[-1]
latest_df = pd.read_excel(latest_path, sheet_name=latest_sheet)
max_invoice_day = pd.to_datetime(latest_df['InvoiceDate'], dayfirst=True, errors='coerce').dt.day.max()

print(f"📊 Configuration Summary:")
print(f"   Sheet Info: {len(sheet_info)} files configured")
print(f"   Sessions Info: {len(sessions_info)} files configured") 
print(f"   Target: {TARGET_PATH} -> {TARGET_SHEET}")
print(f"   Max Invoice Day: {max_invoice_day}")

# Create unique identifiers by combining file path and sheet name
LAST_MONTH_ID = f"{sheet_info[0][0]}_{sheet_info[0][1]}"
LAST_YEAR_ID = f"{sheet_info[1][0]}_{sheet_info[1][1]}"
CURRENT_ID = f"{sheet_info[2][0]}_{sheet_info[2][1]}"

# Dynamic constants extracted from sheet_info for easy reference
LAST_MONTH_PATH = sheet_info[0][0]      # File path for last month
LAST_MONTH_SHEET = sheet_info[0][1]     # Sheet name for last month
LAST_MONTH_DISPLAY = sheet_info[0][2]   # Display name for last month

LAST_YEAR_PATH = sheet_info[1][0]       # File path for last year
LAST_YEAR_SHEET = sheet_info[1][1]      # Sheet name for last year
LAST_YEAR_DISPLAY = sheet_info[1][2]    # Display name for last year

CURRENT_PATH = sheet_info[2][0]         # File path for current month
CURRENT_SHEET = sheet_info[2][1]        # Sheet name for current month
CURRENT_DISPLAY = sheet_info[2][2]      # Display name for current month

# Create dynamic periods list from sheet_info display names
periods = [LAST_MONTH_DISPLAY, LAST_YEAR_DISPLAY, CURRENT_DISPLAY]

print("📊 Dynamic Configuration Loaded:")
print("=" * 40)
print(f"Last Month:   {LAST_MONTH_DISPLAY} (ID: {LAST_MONTH_ID})")
print(f"Last Year:    {LAST_YEAR_DISPLAY} (ID: {LAST_YEAR_ID})")  
print(f"Current:      {CURRENT_DISPLAY} (ID: {CURRENT_ID})")
print(f"Periods:      {periods}")
print("=" * 40)

📊 Configuration Summary:
   Sheet Info: 3 files configured
   Sessions Info: 3 files configured
   Target: test\DSR\June-2025\Target_June_25.xlsx -> Target-June25
   Max Invoice Day: 30
📊 Dynamic Configuration Loaded:
Last Month:   May 25 (ID: test\DSR\May-2025\may25_Invoice.xlsx_Sheet1)
Last Year:    June 24 (ID: test\DSR\June-2024\June24_Invoice.xlsx_Raw data June 24)
Current:      June 25 (ID: test\DSR\June-2025\Invoicedate_30.xlsx_Sheet1)
Periods:      ['May 25', 'June 24', 'June 25']


In [315]:
# Get target sums by day and channel using constants - WITH DEBUG OUTPUT
print("🔍 DEBUGGING TARGET DATA STRUCTURE")
print("=" * 50)

target_df = pd.read_excel(TARGET_PATH, sheet_name=TARGET_SHEET)
print(f"📊 Raw Target Data Shape: {target_df.shape}")
print(f"📋 Target Data Columns: {list(target_df.columns)}")
print("\n📅 First 10 rows of Target Data:")
print(target_df.head(10))

print("\n🏷️ Unique Channels in Target Data:")
print(target_df['Channel'].unique())

print("\n🏷️ Unique Categories in Target Data:")
if 'Category' in target_df.columns:
    print(target_df['Category'].unique())
else:
    print("No 'Category' column found in target data")

# Modified to include 'Category' in the grouping if it exists
if 'Category' in target_df.columns:
    target_sums = target_df.groupby(['Date', 'Channel', 'Category'])['Target'].sum().unstack(level=['Channel', 'Category'], fill_value=0).round(6)
else:
    target_sums = target_df.groupby(['Date', 'Channel'])['Target'].sum().unstack(level='Channel', fill_value=0).round(6)

print(f"\n📊 Processed Target Sums Shape: {target_sums.shape}")
print("\n📋 Target Sums Sample:")
print(target_sums.head())

print("=" * 50)

🔍 DEBUGGING TARGET DATA STRUCTURE
📊 Raw Target Data Shape: (806, 5)
📋 Target Data Columns: ['Date', 'Week', 'Channel', 'Category', 'Target']

📅 First 10 rows of Target Data:
   Date  Week   Channel Category      Target
0     1     1  Jumbo.ae      ACC  2100.00000
1     2     2  Jumbo.ae      ACC  2160.00000
2     3     2  Jumbo.ae      ACC  2125.16129
3     4     2  Jumbo.ae      ACC  2125.16129
4     5     2  Jumbo.ae      ACC  2220.00000
5     6     2  Jumbo.ae      ACC  1980.00000
6     7     2  Jumbo.ae      ACC  1980.00000
7     8     2  Jumbo.ae      ACC  2100.00000
8     9     3  Jumbo.ae      ACC  2040.00000
9    10     3  Jumbo.ae      ACC  1980.00000

🏷️ Unique Channels in Target Data:
['Jumbo.ae' 'EA']

🏷️ Unique Categories in Target Data:
['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']

📊 Processed Target Sums Shape: (31, 26)

📋 Target Sums Sample:
Channel           EA                                                       \
Category         ACC   

In [316]:
# 📅 Week Analysis Configuration - User Input Based
from datetime import datetime
import calendar

print("🗓️  WEEK ANALYSIS CONFIGURATION")
print("=" * 50)
print("📝 Choose your preferred starting day for weekly analysis:")
print("   Options: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday")
print()

# Get the first day of the month from user
while True:
    first_day = first_day.strip().lower()
    if first_day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
        break
    print('Invalid input! Please enter a valid day name.')

# Create a mapping of days to their position in a week (0=Monday to 6=Sunday)
day_positions = {
    'monday': 0, 'tuesday': 1, 'wednesday': 2, 'thursday': 3,
    'friday': 4, 'saturday': 5, 'sunday': 6
}
day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Get position of the first day (0-6, where 0 is Monday)
first_day_position = day_positions[first_day]
start_day = day_names[first_day_position].capitalize()

print(f"✅ First day of month set to: {start_day}")
print(f"📊 Configuration: Week analysis starts based on {start_day} as first day")
print("=" * 50)

def get_week_info(day_of_month, first_day_pos):
    """Get week information for a given day.
    Returns (week_number, is_first_partial_week)"""
    # For days in the first partial week
    if first_day_pos > 0:  # If month doesn't start on Monday
        days_till_next_monday = 7 - first_day_pos
        if day_of_month <= days_till_next_monday:
            return 1, True
        # Adjust day number to calculate remaining weeks
        adjusted_day = day_of_month - days_till_next_monday
        return (adjusted_day - 1) // 7 + 2, False
    else:  # If month starts on Monday
        return (day_of_month - 1) // 7 + 1, False

def get_week_number(day_of_month, first_day_pos):
    """Get week number for a given day of month"""
    week_num, _ = get_week_info(day_of_month, first_day_pos)
    return week_num

def get_week_label(week_num):
    """Get descriptive week label"""
    return f"Week {week_num}"

def get_day_name(day_number, first_day_pos):
    """Get the day name for a given day of month"""
    # Calculate the day of week (0-6, where 0 is Monday)
    day_of_week = (first_day_pos + day_number - 1) % 7
    return day_names[day_of_week]

# Test the calculation
print(f"\n📊 Week Calculation Test (First day: {start_day})")
print("=" * 50)
for day in range(1, 16):  # Show first 15 days
    week_num = get_week_number(day, first_day_position)
    week_label = get_week_label(week_num)
    day_name = get_day_name(day, first_day_position)
    print(f"Day {day:2d} ({day_name:9s}) → {week_label}")

# Calculate total weeks for a typical 31-day month
max_weeks = max([get_week_number(day, first_day_position) for day in range(1, 32)])
print(f"\n📈 Total weeks in a 31-day month: {max_weeks}")
print(f"📋 Analysis will group days based on {start_day} as first day of month")
print("=" * 50)

# Store variables for use in other cells
start_day_num = first_day_position  # For compatibility with existing code


🗓️  WEEK ANALYSIS CONFIGURATION
📝 Choose your preferred starting day for weekly analysis:
   Options: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday

✅ First day of month set to: Sunday
📊 Configuration: Week analysis starts based on Sunday as first day

📊 Week Calculation Test (First day: Sunday)
Day  1 (Sunday   ) → Week 1
Day  2 (Monday   ) → Week 2
Day  3 (Tuesday  ) → Week 2
Day  4 (Wednesday) → Week 2
Day  5 (Thursday ) → Week 2
Day  6 (Friday   ) → Week 2
Day  7 (Saturday ) → Week 2
Day  8 (Sunday   ) → Week 2
Day  9 (Monday   ) → Week 3
Day 10 (Tuesday  ) → Week 3
Day 11 (Wednesday) → Week 3
Day 12 (Thursday ) → Week 3
Day 13 (Friday   ) → Week 3
Day 14 (Saturday ) → Week 3
Day 15 (Sunday   ) → Week 3

📈 Total weeks in a 31-day month: 6
📋 Analysis will group days based on Sunday as first day of month


In [317]:
# Collect week-wise and TYPE-wise sums for each sheet using simplified week calculation
results = []
type_results = []
idg_results = []  # New list for IDG results

for path, sheet, display_name in sheet_info:
    df = pd.read_excel(path, sheet_name=sheet)
    filtered_df = df[~df['idg'].isin(['FOC', 'Remove', 'WRT'])].copy()
    filtered_df['InvoiceDay'] = pd.to_datetime(filtered_df['InvoiceDate'], dayfirst=True, errors='coerce').dt.day


    original_len = len(filtered_df)
    filtered_df = filtered_df[filtered_df['InvoiceDay'] <= max_invoice_day]
    print(f"📉 {display_name}: Filtered {original_len - len(filtered_df)} rows with InvoiceDay > {max_invoice_day}")


    
    # Add week number calculation using the simplified function
    filtered_df['WeekNumber'] = filtered_df['InvoiceDay'].apply(
        lambda day: get_week_number(day, first_day_position)
    )
    
    # Map CC to Jumbo.ae in the TYPE column
    filtered_df['TYPE'] = filtered_df['TYPE'].replace('CC', 'Jumbo.ae')
    filtered_df['TYPE'] = filtered_df['TYPE'].replace('jumbo.ae', 'Jumbo.ae')
    
    # Week-wise sum - use a unique identifier combining file path and sheet name
    unique_id = f"{path}_{sheet}"
    invoice_week_sum = filtered_df.groupby('WeekNumber')['Amount Invoiced W.O. VAT'].sum()
    results.append((unique_id, invoice_week_sum, display_name))
    
    # TYPE-wise sum for Jumbo.ae and EA by week - use unique identifier
    filtered_type = filtered_df[filtered_df['TYPE'].isin(['Jumbo.ae', 'EA'])]
    sum_by_week_type = filtered_type.groupby(['WeekNumber', 'TYPE'])['Amount Invoiced W.O. VAT'].sum().unstack(fill_value=0)
    type_results.append((unique_id, sum_by_week_type, display_name))
    
    # IDG-wise sum by week (new addition) - use unique identifier
    sum_by_week_idg = filtered_df.pivot_table(
        values='Amount Invoiced W.O. VAT',
        index='WeekNumber',
        columns='idg',
        aggfunc='sum',
        fill_value=0
    )
    idg_results.append((unique_id, display_name, sum_by_week_idg))

type_results

# Target Data Processing Functions with Channel Filtering and Debug Output
print("🎯 TARGET DATA PROCESSING WITH CHANNEL FILTERING")
print("=" * 60)

def process_target_data_by_week_and_channel(channel_filter=None):
    """
    Process target data and group by week and IDG (Category), with optional channel filtering
    
    Args:
        channel_filter: str, optional - Filter by 'Jumbo.ae' or 'EA' or None for all
    
    Returns:
        DataFrame with weeks as index and IDG (Category) as columns
    """
    print(f"\n🔄 Processing target data for channel: {channel_filter or 'ALL CHANNELS'}")
    
    # Read target data
    target_df = pd.read_excel(TARGET_PATH, sheet_name=TARGET_SHEET)
    
    print(f"📊 Original target data shape: {target_df.shape}")
    
    # Apply channel filter if specified
    if channel_filter:
        filtered_target = target_df[target_df['Channel'] == channel_filter].copy()
        print(f"🔍 After {channel_filter} filter: {filtered_target.shape}")
        
        if filtered_target.empty:
            print(f"⚠️ WARNING: No target data found for channel '{channel_filter}'")
            print(f"📅 Available channels: {target_df['Channel'].unique()}")
            return pd.DataFrame()
    else:
        filtered_target = target_df.copy()
    
    # Use the 'Date' column directly as 'Day' as it contains day numbers
    filtered_target['Day'] = filtered_target['Date']
    
    # Ensure 'Day' column is numeric and handle potential errors (e.g., non-numeric values)
    filtered_target['Day'] = pd.to_numeric(filtered_target['Day'], errors='coerce')

    # Remove rows with invalid dates (where 'Day' could not be converted to numeric)
    before_date_filter = len(filtered_target)
    filtered_target = filtered_target.dropna(subset=['Day'])
    after_date_filter = len(filtered_target)
    
    if before_date_filter != after_date_filter:
        print(f"⚠️ Removed {before_date_filter - after_date_filter} rows with invalid 'Day' values (non-numeric or empty).")
    
    # Convert 'Day' to integer type after cleaning
    if not filtered_target.empty:
        filtered_target['Day'] = filtered_target['Day'].astype(int)

    original_len = len(filtered_target)
    filtered_target = filtered_target[filtered_target['Day'] <= max_invoice_day]
    print(f"📉 {display_name}: Filtered {original_len - len(filtered_target)} rows with Day > {max_invoice_day}")
    # Add week number calculation
    filtered_target['WeekNumber'] = filtered_target['Day'].apply(
        lambda day: get_week_number(day, first_day_position)
    )
    
    print(f"📅 Date range in target: {filtered_target['Day'].min()} to {filtered_target['Day'].max()}")
    print(f"📋 Week range: {filtered_target['WeekNumber'].min()} to {filtered_target['WeekNumber'].max()}")
    
    # Check if Category column exists and use it, otherwise use Channel as IDG
    if 'Category' in filtered_target.columns:
        print(f"🏷️ Using 'Category' column for IDG grouping")
        print(f"📅 Available categories: {filtered_target['Category'].unique()}")
        
        # Group by Week and Category to get target by week and IDG
        target_by_week = filtered_target.groupby(['WeekNumber', 'Category'])['Target'].sum().unstack(level='Category', fill_value=0)
    else:
        print(f"🏷️ No 'Category' column found, using 'Channel' for IDG grouping")
        
        # Group by Week and Channel to get target by week
        target_by_week = filtered_target.groupby(['WeekNumber', 'Channel'])['Target'].sum().unstack(level='Channel', fill_value=0)
    
    # Clean column names
    target_by_week.columns.name = None
    
    print(f"📊 Final target pivot shape: {target_by_week.shape}")
    print(f"📋 Target IDGs/Categories: {list(target_by_week.columns)}")
    
    return target_by_week

# Test target processing for all channels
print("\n🔬 TESTING TARGET DATA PROCESSING:")
print("-" * 40)

# Import IPython display to avoid conflicts with overridden display variable
from IPython.display import display

# Test 1: All channels
all_channels_target = process_target_data_by_week_and_channel()
if not all_channels_target.empty:
    print("\n📋 ALL CHANNELS TARGET DATA:")
    display(all_channels_target.round(2))

# Test 2: Jumbo.ae only  
jumbo_target = process_target_data_by_week_and_channel('Jumbo.ae')
if not jumbo_target.empty:
    print("\n📋 JUMBO.AE TARGET DATA:")
    display(jumbo_target.round(2))

# Test 3: EA only
ea_target = process_target_data_by_week_and_channel('EA')
if not ea_target.empty:
    print("\n📋 EA TARGET DATA:")
    display(ea_target.round(2))

print("\n=" * 60)

📉 May 25: Filtered 210 rows with InvoiceDay > 30
📉 June 24: Filtered 0 rows with InvoiceDay > 30
📉 June 24: Filtered 0 rows with InvoiceDay > 30
📉 June 25: Filtered 0 rows with InvoiceDay > 30
🎯 TARGET DATA PROCESSING WITH CHANNEL FILTERING

🔬 TESTING TARGET DATA PROCESSING:
----------------------------------------

🔄 Processing target data for channel: ALL CHANNELS
📊 Original target data shape: (806, 5)
📉 June 25: Filtered 26 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']

📋 ALL CHANNELS TARGET DATA:
📉 June 25: Filtered 0 rows with InvoiceDay > 30
🎯 TARGET DATA PROCESSING WITH CHANNEL FILTERING

🔬 TESTING TARGET DATA PROCESSING:
----------------------------------------

🔄 P

Unnamed: 0_level_0,ACC,AV,GAM,IMG,IT,LA,MM,OAD,PG,S&N,SDA,TEL,WTC
WeekNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,2548.43,11651.67,7218.74,5460.0,48381.92,5692.0,4242.75,5306.09,4759.58,6288.45,7997.61,117195.09,3888.29
2,17705.2,80813.22,50009.52,37851.77,331591.59,39602.13,29477.49,36626.43,32570.98,43495.99,55102.99,803135.85,26903.79
3,16458.05,75053.43,46416.56,35145.32,305994.81,36840.71,27401.61,33929.33,30032.02,40337.0,50994.57,741101.11,24954.43
4,15781.17,72226.03,44778.38,33854.68,302038.11,35217.1,26272.67,32985.22,29739.51,39044.56,49771.75,731664.6,24137.16
5,17428.64,79565.15,49243.25,37269.03,326887.1,38977.55,29016.92,36079.16,32114.12,42836.74,54290.41,791749.28,26495.05
6,2535.98,11580.79,7168.91,5425.0,47682.16,5670.0,4222.12,5255.92,4685.7,6238.03,7911.56,115492.38,3858.06



🔄 Processing target data for channel: Jumbo.ae
📊 Original target data shape: (806, 5)
🔍 After Jumbo.ae filter: (403, 5)
📉 June 25: Filtered 13 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']

📋 JUMBO.AE TARGET DATA:


Unnamed: 0_level_0,ACC,AV,GAM,IMG,IT,LA,MM,OAD,PG,S&N,SDA,TEL,WTC
WeekNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,2100.0,9100.0,5425.0,4200.0,23190.67,4900.0,3500.0,3500.0,2100.0,4473.43,4900.0,55897.49,2800.0
2,14690.32,63658.06,37950.0,29380.65,162227.82,34277.42,24483.87,24483.87,14690.32,31293.38,34277.42,391024.85,19587.1
3,13705.16,59389.03,35405.0,27410.32,151348.51,31978.71,22841.94,22841.94,13705.16,29194.79,31978.71,364801.97,18273.55
4,12950.32,56118.06,33455.0,25900.65,143012.7,30217.42,21583.87,21583.87,12950.32,27586.83,30217.42,344709.79,17267.1
5,14450.32,62618.06,37330.0,28900.65,159577.46,33717.42,24083.87,24083.87,14450.32,30782.13,33717.42,384636.57,19267.1
6,2100.0,9100.0,5425.0,4200.0,23190.67,4900.0,3500.0,3500.0,2100.0,4473.43,4900.0,55897.49,2800.0



🔄 Processing target data for channel: EA
📊 Original target data shape: (806, 5)
🔍 After EA filter: (403, 5)
📉 June 25: Filtered 13 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']

📋 EA TARGET DATA:


Unnamed: 0_level_0,ACC,AV,GAM,IMG,IT,LA,MM,OAD,PG,S&N,SDA,TEL,WTC
WeekNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,448.43,2551.67,1793.74,1260.0,25191.25,792.0,742.75,1806.09,2659.58,1815.02,3097.61,61297.6,1088.29
2,3014.88,17155.15,12059.52,8471.13,169363.76,5324.71,4993.62,12142.56,17880.66,12202.61,20825.57,412111.0,7316.69
3,2752.89,15664.39,11011.56,7735.0,154646.29,4862.0,4559.68,11087.39,16326.86,11142.22,19015.86,376299.14,6680.88
4,2830.84,16107.96,11323.38,7954.03,159025.42,4999.68,4688.79,11401.35,16789.18,11457.73,19554.33,386954.81,6870.07
5,2978.31,16947.09,11913.25,8368.39,167309.64,5260.13,4933.05,11995.29,17663.79,12054.61,20572.99,407112.71,7227.95
6,435.98,2480.79,1743.91,1225.0,24491.49,770.0,722.12,1755.92,2585.7,1764.6,3011.56,59594.89,1058.06



=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=


In [318]:
# Get the first sheet info
first_path, first_sheet, first_display = sheet_info[0]

# Read and process the data
df = pd.read_excel(first_path, sheet_name=first_sheet)

# Filter out unwanted IDG values and create pivot table
filtered_df = df[~df['idg'].isin(['FOC', 'Remove', 'WRT'])].copy()
filtered_df['InvoiceDay'] = pd.to_datetime(filtered_df['InvoiceDate'], dayfirst=True, errors='coerce').dt.day

# Create comprehensive IDG pivot table function for reusability

def process_sheet_data_by_type(path, sheet_name, display_name, type_filter=None):
    """Process a single sheet and return IDG pivot by week, optionally filtered by TYPE"""
    df = pd.read_excel(path, sheet_name=sheet_name)
    filtered_df = df[~df['idg'].isin(['FOC', 'Remove', 'WRT'])].copy()
    
    # Apply TYPE filter if specified
    if type_filter:
        # Map CC to Jumbo.ae first
        filtered_df['TYPE'] = filtered_df['TYPE'].replace('CC', 'Jumbo.ae')
        filtered_df = filtered_df[filtered_df['TYPE'] == type_filter]
    
    filtered_df['InvoiceDay'] = pd.to_datetime(filtered_df['InvoiceDate'], dayfirst=True, errors='coerce').dt.day

    original_len = len(filtered_df)
    filtered_df = filtered_df[filtered_df['InvoiceDay'] <= max_invoice_day]
    print(f"📉 {display_name}: Filtered {original_len - len(filtered_df)} rows with InvoiceDay > {max_invoice_day}")
    
    # Add week number calculation using simplified function
    filtered_df['WeekNumber'] = filtered_df['InvoiceDay'].apply(
        lambda day: get_week_number(day, first_day_position)
    )
    
    # Create pivot table with IDG on rows and weeks on columns
    idg_pivot = filtered_df.pivot_table(
        values='Amount Invoiced W.O. VAT',
        index='idg',
        columns='WeekNumber',
        aggfunc='sum',
        fill_value=0
    )
    
    return idg_pivot

def create_comprehensive_pivot_table(sheet_info, periods, type_filter=None, table_name="IDG"):
    """Create a comprehensive pivot table with weeks as super columns and periods as sub-columns, including target data"""
    
    print(f"\n🔄 Creating {table_name} Pivot Table with Target Data")
    print(f"🏷️ Type Filter: {type_filter or 'None (All Types)'}")
    
    # Process target data based on type filter
    # Map type_filter to appropriate channel for target data
    target_channel_filter = None
    if type_filter == "EA":
        target_channel_filter = "EA"
    elif type_filter == "Jumbo.ae":
        target_channel_filter = "Jumbo.ae"
    # For Overall analysis (type_filter=None), we use all channels
    
    print(f"🎯 Processing target data with channel filter: {target_channel_filter or 'ALL CHANNELS'}")
    target_by_week = process_target_data_by_week_and_channel(target_channel_filter)
    
    # Process all sheets with optional TYPE filter
    # Using display_name as the key in the dictionary instead of sheet_name
    sheet_data = {}
    for path, sheet_name, display_name in sheet_info:
        print(f"📄 Processing {display_name} data...")
        sheet_data[display_name] = process_sheet_data_by_type(path, sheet_name, display_name, type_filter)
    
    # Get all unique IDG values across all sheets and target data
    all_idgs = set()
    for data in sheet_data.values():
        all_idgs.update(data.index)
    
    # Add IDGs from target data if available
    if not target_by_week.empty:
        all_idgs.update(target_by_week.columns)
        print(f"🎯 Target IDGs found: {list(target_by_week.columns)}")
    else:
        print(f"⚠️ No target data available for filter: {target_channel_filter}")
    
    all_idgs = sorted(list(all_idgs))
    print(f"📋 Combined IDGs: {all_idgs}")
    
    # Calculate maximum week number across all data
    max_week = 1
    for data in sheet_data.values():
        if len(data.columns) > 0:
            max_week = max(max_week, max(data.columns))
    
    if not target_by_week.empty and len(target_by_week.index) > 0:
        max_week = max(max_week, max(target_by_week.index))
    
    print(f"📅 Maximum week number: {max_week}")
    
    # Create the comprehensive pivot table
    weeks = list(range(1, max_week + 1))
    
    # Create multi-level column index with target, comparison columns and totals
    # Order: Last Month, Last Year, Target, Current Month, vs Target %, vs Last Year %, vs Last Month %
    column_tuples = []
    for week in weeks:
        week_label = get_week_label(week)
        # Add period columns in specific order
        column_tuples.append((week_label, LAST_MONTH_DISPLAY))
        column_tuples.append((week_label, LAST_YEAR_DISPLAY))
        column_tuples.append((week_label, 'Target'))
        column_tuples.append((week_label, CURRENT_DISPLAY))
        # Add comparison columns
        column_tuples.append((week_label, 'v/s Target %'))
        column_tuples.append((week_label, 'v/s Last Year %'))
        column_tuples.append((week_label, 'v/s Last Month %'))
    # Add Total columns in same order
    column_tuples.append(('Total', LAST_MONTH_DISPLAY))
    column_tuples.append(('Total', LAST_YEAR_DISPLAY))
    column_tuples.append(('Total', 'Target'))
    column_tuples.append(('Total', CURRENT_DISPLAY))
    column_tuples.append(('Total', 'v/s Target %'))
    column_tuples.append(('Total', 'v/s Last Year %'))
    column_tuples.append(('Total', 'v/s Last Month %'))
    
    multi_columns = pd.MultiIndex.from_tuples(column_tuples, names=['Week', 'Period'])
    
    # Create the final dataframe
    final_df = pd.DataFrame(index=all_idgs, columns=multi_columns)
    
    # Fill the dataframe with data and calculate comparisons
    for idg in all_idgs:
        for week in weeks:
            week_label = get_week_label(week)
            
            # Fill period data
            for period in periods:
                if period in sheet_data and idg in sheet_data[period].index and week in sheet_data[period].columns:
                    final_df.loc[idg, (week_label, period)] = sheet_data[period].loc[idg, week]
                else:
                    final_df.loc[idg, (week_label, period)] = 0
            
            # Fill target data
            if not target_by_week.empty and week in target_by_week.index and idg in target_by_week.columns:
                final_df.loc[idg, (week_label, 'Target')] = target_by_week.loc[week, idg]
            else:
                final_df.loc[idg, (week_label, 'Target')] = 0
            
            # Calculate comparison percentages for each IDG and week
            current_val = final_df.loc[idg, (week_label, CURRENT_DISPLAY)]
            last_year_val = final_df.loc[idg, (week_label, LAST_YEAR_DISPLAY)]
            last_month_val = final_df.loc[idg, (week_label, LAST_MONTH_DISPLAY)]
            target_val = final_df.loc[idg, (week_label, 'Target')]
            
            # v/s Target %
            if target_val != 0:
                vs_target = (current_val / target_val * 100)  # Changed formula
                final_df.loc[idg, (week_label, 'v/s Target %')] = round(vs_target, 2)
            else:
                final_df.loc[idg, (week_label, 'v/s Target %')] = 0 if current_val == 0 else float('inf') # Or handle as per requirement for 0 target
            
            # v/s Last Year %
            if last_year_val != 0:
                vs_last_year = ((current_val - last_year_val) / last_year_val * 100)
                final_df.loc[idg, (week_label, 'v/s Last Year %')] = round(vs_last_year, 2)
            else:
                final_df.loc[idg, (week_label, 'v/s Last Year %')] = 0 if current_val == 0 else float('inf')
            
            # v/s Last Month %
            if last_month_val != 0:
                vs_last_month = ((current_val - last_month_val) / last_month_val * 100)
                final_df.loc[idg, (week_label, 'v/s Last Month %')] = round(vs_last_month, 2)
            else:
                final_df.loc[idg, (week_label, 'v/s Last Month %')] = 0 if current_val == 0 else float('inf')
    
    # Fill NaN values with 0
    final_df = final_df.fillna(0)
    
    # Calculate Total columns for each IDG
    for idg in all_idgs:
        # Calculate totals for each period across all weeks
        for period in periods:
            period_cols = [col for col in final_df.columns if col[1] == period and col[0] != 'Total']
            total_value = final_df.loc[idg, period_cols].sum()
            final_df.loc[idg, ('Total', period)] = total_value
        
        # Calculate total for target
        target_cols = [col for col in final_df.columns if col[1] == 'Target' and col[0] != 'Total']
        total_target = final_df.loc[idg, target_cols].sum()
        final_df.loc[idg, ('Total', 'Target')] = total_target
        
        # Calculate total comparison percentages
        total_current = final_df.loc[idg, ('Total', CURRENT_DISPLAY)]
        total_last_year = final_df.loc[idg, ('Total', LAST_YEAR_DISPLAY)]
        total_last_month = final_df.loc[idg, ('Total', LAST_MONTH_DISPLAY)]
        total_target = final_df.loc[idg, ('Total', 'Target')]
        
        # Total v/s Target %
        if total_target != 0:
            total_vs_target = (total_current / total_target * 100)  # Changed formula
            final_df.loc[idg, ('Total', 'v/s Target %')] = round(total_vs_target, 2)
        else:
            final_df.loc[idg, ('Total', 'v/s Target %')] = 0 if total_current == 0 else float('inf') # Or handle as per requirement for 0 target
        
        # Total v/s Last Year %
        if total_last_year != 0:
            total_vs_last_year = ((total_current - total_last_year) / total_last_year * 100)
            final_df.loc[idg, ('Total', 'v/s Last Year %')] = round(total_vs_last_year, 2)
        else:
            final_df.loc[idg, ('Total', 'v/s Last Year %')] = 0 if total_current == 0 else float('inf')
        
        # Total v/s Last Month %
        if total_last_month != 0:
            total_vs_last_month = ((total_current - total_last_month) / total_last_month * 100)
            final_df.loc[idg, ('Total', 'v/s Last Month %')] = round(total_vs_last_month, 2)
        else:
            final_df.loc[idg, ('Total', 'v/s Last Month %')] = 0 if total_current == 0 else float('inf')
    
    # Add Total row for all IDGs combined
    total_row_data = {}
    for col in final_df.columns:
        if 'v/s' in col[1]:  # For percentage columns, calculate weighted averages
            if col[1] in ['v/s Target %', 'v/s Last Year %', 'v/s Last Month %']:
                # Calculate overall percentage for the total row
                if col[0] == 'Total':  # Total column
                    total_current = final_df[('Total', CURRENT_DISPLAY)].sum()
                    total_base = 0
                    if col[1] == 'v/s Target %':
                        total_base = final_df[('Total', 'Target')].sum()
                    elif col[1] == 'v/s Last Year %':
                        total_base = final_df[('Total', LAST_YEAR_DISPLAY)].sum()
                    else:  # v/s Last Month %
                        total_base = final_df[('Total', LAST_MONTH_DISPLAY)].sum()
                    
                    if total_base != 0:
                        if col[1] == 'v/s Target %':
                            total_percentage = (total_current / total_base * 100) # Changed formula
                        else:
                            total_percentage = ((total_current - total_base) / total_base * 100)
                        total_row_data[col] = round(total_percentage, 2)
                    else:
                        total_row_data[col] = 0
                else:  # Week-wise percentage columns
                    week_label = col[0]
                    week_current = final_df[(week_label, CURRENT_DISPLAY)].sum()
                    week_base = 0
                    if col[1] == 'v/s Target %':
                        week_base = final_df[(week_label, 'Target')].sum()
                    elif col[1] == 'v/s Last Year %':
                        week_base = final_df[(week_label, LAST_YEAR_DISPLAY)].sum()
                    else:  # v/s Last Month %
                        week_base = final_df[(week_label, LAST_MONTH_DISPLAY)].sum()
                    
                    if week_base != 0:
                        if col[1] == 'v/s Target %':
                            week_percentage = (week_current / week_base * 100) # Changed formula
                        else:
                            week_percentage = ((week_current - week_base) / week_base * 100)
                        total_row_data[col] = round(week_percentage, 2)
                    else:
                        total_row_data[col] = 0
        else:
            # For amount columns, sum all IDGs
            total_row_data[col] = final_df[col].sum()
    
    # Create total row as a DataFrame and concatenate
    total_row_df = pd.DataFrame([total_row_data], index=['Total'])
    final_df = pd.concat([final_df, total_row_df])
    
    print(f"✅ {table_name} pivot table created successfully!")
    print(f"📊 Final shape: {final_df.shape}")
    
    return final_df, all_idgs, max_week

# Create three pivot tables: Overall, EA only, and Jumbo.ae only with Target Data
print("🔄 Creating Multiple Pivot Tables WITH TARGET DATA AND PROPER FILTERING...")
print("=" * 70)

# 1. Overall IDG Pivot Table (all data)
print("\n📊 1. OVERALL IDG ANALYSIS (All Types) - WITH TARGET DATA")
print("-" * 55)
global_idg_pivot, all_idgs_global, max_week_global = create_comprehensive_pivot_table(
    sheet_info, periods, type_filter=None, table_name="Overall IDG"
)

print(f"\n📊 Data Shape: {global_idg_pivot.shape}")
print(f"🏷️ IDG Categories: {len(all_idgs_global)} (+ Total row)")
print(f"📅 Periods: {periods} + Target + Comparisons")
print(f"📋 Weeks analyzed: {max_week_global} (Starting day: {start_day}) + Total column")

# Summary statistics by period for overall
print("\n📊 SUMMARY BY PERIOD (Overall):")
for period in periods:
    period_total = global_idg_pivot.loc['Total', ('Total', period)]
    print(f"  {period}: {period_total:,.2f}")
target_total = global_idg_pivot.loc['Total', ('Total', 'Target')]
print(f"  Target: {target_total:,.2f}")

print("\n📋 Overall IDG Pivot Table (with Target):")
display(global_idg_pivot.round(2))

# 2. EA Only Pivot Table
print("\n" + "=" * 70)
print("📊 2. EA ONLY ANALYSIS - WITH TARGET DATA")
print("-" * 45)
ea_idg_pivot, all_idgs_ea, max_week_ea = create_comprehensive_pivot_table(
    sheet_info, periods, type_filter="EA", table_name="EA IDG"
)

print(f"\n📊 Data Shape: {ea_idg_pivot.shape}")
print(f"🏷️ IDG Categories: {len(all_idgs_ea)} (+ Total row)")
print(f"📅 Periods: {periods} + Target + Comparisons")
print(f"📋 Weeks analyzed: {max_week_ea} (Starting day: {start_day}) + Total column")

# Summary statistics by period for EA
print("\n📊 SUMMARY BY PERIOD (EA Only):")
for period in periods:
    period_total = ea_idg_pivot.loc['Total', ('Total', period)]
    print(f"  {period}: {period_total:,.2f}")
target_total_ea = ea_idg_pivot.loc['Total', ('Total', 'Target')]
print(f"  Target: {target_total_ea:,.2f}")

print("\n📋 EA Only IDG Pivot Table (with Target):")
display(ea_idg_pivot.round(2))

# 3. Jumbo.ae Only Pivot Table
print("\n" + "=" * 70)
print("📊 3. JUMBO.AE ONLY ANALYSIS - WITH TARGET DATA")
print("-" * 50)
jumbo_idg_pivot, all_idgs_jumbo, max_week_jumbo = create_comprehensive_pivot_table(
    sheet_info, periods, type_filter="Jumbo.ae", table_name="Jumbo.ae IDG"
)

print(f"\n📊 Data Shape: {jumbo_idg_pivot.shape}")
print(f"🏷️ IDG Categories: {len(all_idgs_jumbo)} (+ Total row)")
print(f"📅 Periods: {periods} + Target + Comparisons")
print(f"📋 Weeks analyzed: {max_week_jumbo} (Starting day: {start_day}) + Total column")

# Summary statistics by period for Jumbo.ae
print("\n📊 SUMMARY BY PERIOD (Jumbo.ae Only):")
for period in periods:
    period_total = jumbo_idg_pivot.loc['Total', ('Total', period)]
    print(f"  {period}: {period_total:,.2f}")
target_total_jumbo = jumbo_idg_pivot.loc['Total', ('Total', 'Target')]
print(f"  Target: {target_total_jumbo:,.2f}")

print("\n📋 Jumbo.ae Only IDG Pivot Table (with Target):")
display(jumbo_idg_pivot.round(2))

print("\n" + "=" * 70)
print("✅ ALL PIVOT TABLES WITH TARGET DATA CREATED SUCCESSFULLY!")
print("📊 Summary:")
print(f"   • Overall Analysis: {len(all_idgs_global)} IDGs, {max_week_global} weeks")
print(f"   • EA Analysis: {len(all_idgs_ea)} IDGs, {max_week_ea} weeks")
print(f"   • Jumbo.ae Analysis: {len(all_idgs_jumbo)} IDGs, {max_week_jumbo} weeks")
print(f"   • Each table includes: {len(periods)} periods + Target + vs Target % + comparisons + totals")
print(f"   • Column order: Last Month, Last Year, Target, Current Month, vs Target %, vs Last Year %, vs Last Month %")
print("=" * 70)

🔄 Creating Multiple Pivot Tables WITH TARGET DATA AND PROPER FILTERING...

📊 1. OVERALL IDG ANALYSIS (All Types) - WITH TARGET DATA
-------------------------------------------------------

🔄 Creating Overall IDG Pivot Table with Target Data
🏷️ Type Filter: None (All Types)
🎯 Processing target data with channel filter: ALL CHANNELS

🔄 Processing target data for channel: ALL CHANNELS
📊 Original target data shape: (806, 5)
📉 June 25: Filtered 26 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']
📄 Processing May 25 data...
📉 May 25: Filtered 210 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 May 25: Filtered 210 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 Jun

  final_df = final_df.fillna(0)
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', 'Target')] = total_target
  final_df.loc[idg, ('Total', 'v/s Target %')] = round(total_vs_target, 2)
  final_df.loc[idg, ('Total', 'v/s Last Year %')] = round(total_vs_last_year, 2)
  final_df.loc[idg, ('Total', 'v/s Last Month %')] = round(total_vs_last_month, 2)


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 2,Week 2,Week 2,...,Week 6,Week 6,Week 6,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,...,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %
ACC,2002.86,398.1,2548.43,0.0,0.0,-100.0,-100.0,9511.83,3124.22,17705.2,...,70.6,311.38,-66.78,43349.94,20712.78,72457.47,46461.2,64.12,124.31,7.18
AV,14663.81,5098.48,11651.67,0.0,0.0,-100.0,-100.0,42612.7,84293.36,80813.22,...,10.68,-25.69,inf,308517.26,191186.13,330890.27,253455.79,76.6,32.57,-17.85
GAM,289.52,836.19,7218.74,347.62,4.82,-58.43,20.07,6239.76,26465.97,50009.52,...,5.16,inf,-98.71,146102.27,93844.19,204835.37,147694.71,72.1,57.38,1.09
IMG,8368.57,2568.57,5460.0,1808.62,33.12,-29.59,-78.39,35871.54,25552.33,37851.77,...,623.76,1771.04,37.29,201674.84,100927.63,155005.81,136333.38,87.95,35.08,-32.4
IT,66954.33,38434.75,48381.92,42469.85,87.78,10.5,-36.57,347989.35,201419.75,331591.59,...,64.65,84.09,-31.54,1260416.01,944728.38,1362575.69,1229239.58,90.21,30.12,-2.47
LA,3237.67,0.0,5692.0,0.0,0.0,0.0,-100.0,23175.04,14527.62,39602.13,...,61.26,inf,70.66,148802.21,95173.01,161999.48,149254.04,92.13,56.82,0.3
MM,1614.28,1760.0,4242.75,931.43,21.95,-47.08,-42.3,23026.76,10196.29,29477.49,...,146.27,-4.41,191.3,85761.84,74201.32,120633.56,99926.24,82.83,34.67,16.52
OAD,13075.24,3951.43,5306.09,0.0,0.0,-100.0,-100.0,37197.33,15578.7,36626.43,...,118.61,191.04,136.13,132563.72,81008.24,150182.16,66504.51,44.28,-17.9,-49.83
PG,6663.31,1284.76,4759.58,0.0,0.0,-100.0,-100.0,11483.38,7261.95,32570.98,...,13.84,inf,-83.03,105586.83,90255.38,133901.9,43272.66,32.32,-52.06,-59.02
PRO,0.0,14.29,0.0,0.0,0.0,-100.0,0.0,0.0,142.86,0.0,...,0.0,-100.0,0.0,0.0,740.0,0.0,196.67,inf,-73.42,inf



📊 2. EA ONLY ANALYSIS - WITH TARGET DATA
---------------------------------------------

🔄 Creating EA IDG Pivot Table with Target Data
🏷️ Type Filter: EA
🎯 Processing target data with channel filter: EA

🔄 Processing target data for channel: EA
📊 Original target data shape: (806, 5)
🔍 After EA filter: (403, 5)
📉 June 25: Filtered 13 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']
📄 Processing May 25 data...
📉 May 25: Filtered 113 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 May 25: Filtered 113 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 June 24: Filtered 0 rows with InvoiceDay > 30
📄 Processing June 25 data...
📉 June 24: Filtered 0 rows with Invoice

  final_df = final_df.fillna(0)
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', 'Target')] = total_target
  final_df.loc[idg, ('Total', 'v/s Target %')] = round(total_vs_target, 2)
  final_df.loc[idg, ('Total', 'v/s Last Year %')] = round(total_vs_last_year, 2)
  final_df.loc[idg, ('Total', 'v/s Last Month %')] = round(total_vs_last_month, 2)


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 2,Week 2,Week 2,...,Week 6,Week 6,Week 6,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,...,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %
ACC,75.24,0.0,448.43,0.0,0.0,0.0,-100.0,2355.24,185.17,3014.88,...,113.37,556.94,716.52,6703.56,5210.88,12461.34,6684.2,53.64,28.27,-0.29
AV,11236.19,1766.1,2551.67,0.0,0.0,-100.0,-100.0,8235.55,16327.62,17155.15,...,0.0,-100.0,0.0,70879.61,38180.38,70907.05,23290.48,32.85,-39.0,-67.14
GAM,0.0,0.0,1793.74,0.0,0.0,0.0,0.0,4736.19,5425.71,12059.52,...,0.0,0.0,-100.0,15584.33,20442.8,49845.37,24536.38,49.22,20.02,57.44
IMG,0.0,0.0,1260.0,1808.62,143.54,inf,inf,11539.05,6970.97,8471.13,...,0.0,0.0,-100.0,57490.47,32590.06,35013.55,36883.34,105.34,13.17,-35.84
IT,20761.92,27982.47,25191.25,21123.68,83.85,-24.51,1.74,177332.61,142419.09,169363.76,...,117.71,248.05,7.49,670891.38,636900.9,700027.85,647291.93,92.47,1.63,-3.52
LA,858.62,0.0,792.0,0.0,0.0,0.0,-100.0,4457.2,0.0,5324.71,...,0.0,0.0,0.0,21802.35,8538.6,22008.52,6330.48,28.76,-25.86,-70.96
MM,522.86,618.1,742.75,856.19,115.27,38.52,63.75,4934.28,1962.0,4993.62,...,411.91,-26.47,367.55,20198.65,20420.34,20640.01,35271.24,170.89,72.73,74.62
OAD,1903.81,3263.81,1806.09,0.0,0.0,-100.0,-100.0,13730.48,3081.9,12142.56,...,346.61,inf,326.32,50169.19,24700.02,50188.61,27932.11,55.65,13.09,-44.32
PG,5199.07,0.0,2659.58,0.0,0.0,0.0,-100.0,1618.1,3709.57,17880.66,...,0.0,0.0,-100.0,73504.78,59367.76,73905.78,14966.77,20.25,-74.79,-79.64
PRO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,45.71,0.0,0.0,0.0,-100.0,0.0



📊 3. JUMBO.AE ONLY ANALYSIS - WITH TARGET DATA
--------------------------------------------------

🔄 Creating Jumbo.ae IDG Pivot Table with Target Data
🏷️ Type Filter: Jumbo.ae
🎯 Processing target data with channel filter: Jumbo.ae

🔄 Processing target data for channel: Jumbo.ae
📊 Original target data shape: (806, 5)
🔍 After Jumbo.ae filter: (403, 5)
📉 June 25: Filtered 13 rows with Day > 30
📅 Date range in target: 1 to 30
📋 Week range: 1 to 6
🏷️ Using 'Category' column for IDG grouping
📅 Available categories: ['ACC' 'AV' 'GAM' 'IMG' 'IT' 'LA' 'MM' 'OAD' 'PG' 'S&N' 'SDA' 'TEL' 'WTC']
📊 Final target pivot shape: (6, 13)
📋 Target IDGs/Categories: ['ACC', 'AV', 'GAM', 'IMG', 'IT', 'LA', 'MM', 'OAD', 'PG', 'S&N', 'SDA', 'TEL', 'WTC']
📄 Processing May 25 data...
📉 May 25: Filtered 97 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 May 25: Filtered 97 rows with InvoiceDay > 30
📄 Processing June 24 data...
📉 June 24: Filtered 0 rows with InvoiceDay > 30
📄 Processing June 25 data...


  final_df = final_df.fillna(0)
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', period)] = total_value
  final_df.loc[idg, ('Total', 'Target')] = total_target
  final_df.loc[idg, ('Total', 'v/s Target %')] = round(total_vs_target, 2)
  final_df.loc[idg, ('Total', 'v/s Last Year %')] = round(total_vs_last_year, 2)
  final_df.loc[idg, ('Total', 'v/s Last Month %')] = round(total_vs_last_month, 2)


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 2,Week 2,Week 2,...,Week 6,Week 6,Week 6,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,...,v/s Target %,v/s Last Year %,v/s Last Month %,May 25,June 24,Target,June 25,v/s Target %,v/s Last Year %,v/s Last Month %
ACC,1927.62,398.1,2100.0,0.0,0.0,-100.0,-100.0,7156.59,2939.05,14690.32,...,61.72,260.05,-75.67,36646.38,15501.9,59996.13,39777.0,66.3,156.59,8.54
AV,3427.62,3332.38,9100.0,0.0,0.0,-100.0,-100.0,34377.14,67965.74,63658.06,...,13.59,62.58,inf,237637.65,153005.74,259983.23,230165.31,88.53,50.43,-3.14
GAM,289.52,836.19,5425.0,347.62,6.41,-58.43,20.07,1503.57,21040.26,37950.0,...,6.82,inf,-98.55,130517.94,73401.39,154990.0,123158.33,79.46,67.79,-5.64
IMG,8368.57,2568.57,4200.0,0.0,0.0,-100.0,-100.0,24332.5,18581.36,29380.65,...,805.69,1771.04,123.73,144184.37,68337.58,119992.26,99450.05,82.88,45.53,-31.03
IT,46192.4,10452.28,23190.67,21346.17,92.05,104.22,-53.79,170656.74,59000.66,162227.82,...,8.62,-76.38,-89.02,589524.64,307827.48,662547.84,581947.65,87.83,89.05,-1.29
LA,2379.05,0.0,4900.0,0.0,0.0,0.0,-100.0,18717.84,14527.62,34277.42,...,70.88,inf,70.66,126999.85,86634.41,139990.97,142923.56,102.09,64.97,12.54
MM,1091.43,1141.9,3500.0,75.24,2.15,-93.41,-93.11,18092.47,8234.29,24483.87,...,91.46,32.53,115.73,65563.19,53780.98,99993.55,64655.0,64.66,20.22,-1.39
OAD,11171.43,687.62,3500.0,0.0,0.0,-100.0,-100.0,23466.85,12496.79,24483.87,...,4.22,-93.11,-87.82,82394.52,56308.22,99993.55,38572.4,38.57,-31.5,-53.19
PG,1464.24,1284.76,2100.0,0.0,0.0,-100.0,-100.0,9865.29,3552.38,14690.32,...,30.88,inf,79.66,32082.05,30887.62,59996.13,28305.89,47.18,-8.36,-11.77
PRO,0.0,14.29,0.0,0.0,0.0,-100.0,0.0,0.0,142.86,0.0,...,0.0,-100.0,0.0,0.0,694.29,0.0,196.67,inf,-71.67,inf



✅ ALL PIVOT TABLES WITH TARGET DATA CREATED SUCCESSFULLY!
📊 Summary:
   • Overall Analysis: 15 IDGs, 6 weeks
   • EA Analysis: 15 IDGs, 6 weeks
   • Jumbo.ae Analysis: 15 IDGs, 6 weeks
   • Each table includes: 3 periods + Target + vs Target % + comparisons + totals
   • Column order: Last Month, Last Year, Target, Current Month, vs Target %, vs Last Year %, vs Last Month %


In [319]:
# IDG Analysis Report - Multiple Pivot Tables

# Additional pivot table views for all three analyses

# Weekly Analysis Views

# 1. Show first few weeks detailed view for Overall analysis
print("📊 OVERALL ANALYSIS - First 3 Weeks IDG Data")
print("=" * 50)
first_weeks_cols = []
for week in range(1, min(4, max_week_global + 1)):  # First 3 weeks or max available
    week_label = get_week_label(week)
    for period in periods:  # Use dynamic periods instead of hardcoded values
        first_weeks_cols.append((week_label, period))

if first_weeks_cols:  # Only if we have data
    first_weeks_data = global_idg_pivot[first_weeks_cols]
    display(first_weeks_data.round(2))
else:
    print("No week data available")

print("\n" + "=" * 50)

# 2. Weekly totals comparison for Overall analysis
print("📈 OVERALL ANALYSIS - Weekly Totals Comparison")
print("=" * 45)
weekly_totals = pd.DataFrame(index=range(1, max_week_global + 1), columns=periods)  # Use dynamic periods

for week in range(1, max_week_global + 1):
    week_label = get_week_label(week)
    for period in periods:  # Use dynamic periods
        week_period_cols = [(week_label, period)]
        weekly_total = global_idg_pivot[week_period_cols].sum().sum()
        weekly_totals.loc[week, period] = weekly_total

# Convert to numeric and add growth percentages
weekly_totals = weekly_totals.astype(float)
weekly_totals['Current_vs_LastYear_%'] = ((weekly_totals[CURRENT_DISPLAY] - weekly_totals[LAST_YEAR_DISPLAY]) / weekly_totals[LAST_YEAR_DISPLAY] * 100).round(2)
weekly_totals['Current_vs_LastMonth_%'] = ((weekly_totals[CURRENT_DISPLAY] - weekly_totals[LAST_MONTH_DISPLAY]) / weekly_totals[LAST_MONTH_DISPLAY] * 100).round(2)

# Add week labels for better readability
weekly_totals.index = [get_week_label(week) for week in range(1, max_week_global + 1)]

display(weekly_totals.round(2))

print("\n" + "=" * 50)

# 3. IDG totals by period (summary pivot) for Overall analysis
print("📋 OVERALL ANALYSIS - IDG Totals by Period (Week-based)")
print("=" * 55)
idg_period_summary = pd.DataFrame(index=all_idgs_global, columns=periods)  # Use dynamic periods

for idg in all_idgs_global:
    for period in periods:  # Use dynamic periods
        period_cols = [col for col in global_idg_pivot.columns if col[1] == period]
        idg_period_summary.loc[idg, period] = global_idg_pivot.loc[idg, period_cols].sum()

# Convert to numeric and add totals
idg_period_summary = idg_period_summary.astype(float)
idg_period_summary['Total'] = idg_period_summary.sum(axis=1)
idg_period_summary.loc['Total'] = idg_period_summary.sum()

display(idg_period_summary.round(2))

# 4. Quick comparison between the three analyses
print("\n" + "=" * 60)
print("📊 COMPARISON SUMMARY - All Three Analyses")
print("=" * 60)

# Create summary comparison
comparison_summary = pd.DataFrame({
    'Analysis_Type': ['Overall', 'EA Only', 'Jumbo.ae Only'],
    'IDG_Count': [len(all_idgs_global), len(all_idgs_ea), len(all_idgs_jumbo)],
    'Max_Weeks': [max_week_global, max_week_ea, max_week_jumbo]
})

# Add period totals for each analysis
for period in periods:
    comparison_summary[f'{period}_Total'] = [
        global_idg_pivot.loc['Total', ('Total', period)],
        ea_idg_pivot.loc['Total', ('Total', period)],
        jumbo_idg_pivot.loc['Total', ('Total', period)]
    ]

print("Summary of All Three Pivot Table Analyses:")
display(comparison_summary.round(2))

print("\n📈 Key Insights:")
print(f"• Overall analysis covers {len(all_idgs_global)} IDG categories across {max_week_global} weeks")
print(f"• EA analysis covers {len(all_idgs_ea)} IDG categories across {max_week_ea} weeks")
print(f"• Jumbo.ae analysis covers {len(all_idgs_jumbo)} IDG categories across {max_week_jumbo} weeks")
print(f"• Each analysis includes {len(periods)} periods: {', '.join(periods)}")
print("=" * 60)

📊 OVERALL ANALYSIS - First 3 Weeks IDG Data


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 2,Week 2,Week 2,Week 3,Week 3,Week 3
Unnamed: 0_level_1,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25
ACC,2002.86,398.1,0.0,9511.83,3124.22,7052.97,9040.0,2432.38,13084.42
AV,14663.81,5098.48,0.0,42612.7,84293.36,28331.45,79079.04,25348.57,56715.26
GAM,289.52,836.19,347.62,6239.76,26465.97,53799.95,12820.57,31312.23,52762.62
IMG,8368.57,2568.57,1808.62,35871.54,25552.33,27404.98,37989.06,18231.43,23623.52
IT,66954.33,38434.75,42469.85,347989.35,201419.75,364807.55,268521.07,204738.22,244786.33
LA,3237.67,0.0,0.0,23175.04,14527.62,19709.68,36023.5,18951.93,32825.88
MM,1614.28,1760.0,931.43,23026.76,10196.29,35584.72,21805.57,15579.05,17812.58
OAD,13075.24,3951.43,0.0,37197.33,15578.7,22137.17,15239.1,23545.74,16442.9
PG,6663.31,1284.76,0.0,11483.38,7261.95,10207.7,23101.41,28934.36,11661.01
PRO,0.0,14.29,0.0,0.0,142.86,175.24,0.0,200.0,0.0



📈 OVERALL ANALYSIS - Weekly Totals Comparison


Unnamed: 0,May 25,June 24,June 25,Current_vs_LastYear_%,Current_vs_LastMonth_%
Week 1,428216.42,622853.13,285238.3,-54.2,-33.39
Week 2,2653220.23,2231818.94,3172271.07,42.14,19.56
Week 3,2564989.17,1925412.39,2453187.95,27.41,-4.36
Week 4,2786687.31,2996665.78,2180569.34,-27.23,-21.75
Week 5,2806218.87,2081460.3,3629786.35,74.39,29.35
Week 6,709481.38,256965.0,454398.44,76.83,-35.95



📋 OVERALL ANALYSIS - IDG Totals by Period (Week-based)


Unnamed: 0,May 25,June 24,June 25,Total
ACC,86699.89,41425.57,92922.41,221047.86
AV,617034.53,382372.25,506911.58,1506318.37
GAM,292204.55,187688.38,295389.41,775282.34
IMG,403349.68,201855.27,272666.77,877871.72
IT,2520832.02,1889456.76,2458479.16,6868767.94
LA,297604.41,190346.02,298508.07,786458.51
MM,171523.68,148402.64,199852.48,519778.8
OAD,265127.43,162016.49,133009.02,560152.94
PG,211173.65,180510.75,86545.32,478229.73
PRO,0.0,1480.01,393.33,1873.34



📊 COMPARISON SUMMARY - All Three Analyses
Summary of All Three Pivot Table Analyses:


Unnamed: 0,Analysis_Type,IDG_Count,Max_Weeks,May 25_Total,June 24_Total,June 25_Total
0,Overall,15,6,5974406.69,5057587.77,6087725.73
1,EA Only,15,6,2765938.26,3437467.0,3258887.39
2,Jumbo.ae Only,15,6,3208468.43,1620120.77,2828838.34



📈 Key Insights:
• Overall analysis covers 15 IDG categories across 6 weeks
• EA analysis covers 15 IDG categories across 6 weeks
• Jumbo.ae analysis covers 15 IDG categories across 6 weeks
• Each analysis includes 3 periods: May 25, June 24, June 25


In [320]:
# Weekly Comparison Analysis - Focused View for All Three Analyses
print("📊 COMPREHENSIVE GROWTH COMPARISON ANALYSIS")
print("=" * 55)

def analyze_growth_patterns(pivot_table, all_idgs, max_week, analysis_name):
    """Analyze growth patterns for a given pivot table"""
    print(f"\n🔍 {analysis_name.upper()} ANALYSIS - Growth Patterns")
    print("-" * 50)
    
    # Create a focused view showing only comparison percentages
    comparison_cols = []
    for week in range(1, max_week + 1):
        week_label = get_week_label(week)
        comparison_cols.extend([
            (week_label, 'v/s Last Year %'),
            (week_label, 'v/s Last Month %')
        ])
    
    if comparison_cols:
        comparison_data = pivot_table[comparison_cols]
        print(f"\n📈 {analysis_name} - Growth Percentages by Week:")
        display(comparison_data.round(2))
        
        # Calculate average growth rates
        print(f"\n📊 {analysis_name} - Average Growth Rates Across All Weeks:")
        print("-" * 45)
        
        # Get all v/s Last Year columns
        last_year_cols = [col for col in comparison_data.columns if 'v/s Last Year %' in col[1]]
        last_month_cols = [col for col in comparison_data.columns if 'v/s Last Month %' in col[1]]
        
        for idg in all_idgs:
            avg_vs_last_year = comparison_data.loc[idg, last_year_cols].replace([float('inf'), -float('inf')], 0).mean()
            avg_vs_last_month = comparison_data.loc[idg, last_month_cols].replace([float('inf'), -float('inf')], 0).mean()
            print(f"{idg:15s} | Avg vs Last Year: {avg_vs_last_year:6.1f}% | Avg vs Last Month: {avg_vs_last_month:6.1f}%")
        
        # Weekly summary of overall performance
        print(f"\n📋 {analysis_name} - Weekly Performance Summary (All IDGs Combined):")
        print("-" * 55)
        
        weekly_performance = pd.DataFrame(index=range(1, max_week + 1), 
                                        columns=['vs_Last_Year_%', 'vs_Last_Month_%'])
        
        for week in range(1, max_week + 1):
            week_label = get_week_label(week)
            
            # Calculate total amounts for the week using dynamic period names
            current_total = pivot_table[(week_label, CURRENT_DISPLAY)].sum()
            last_year_total = pivot_table[(week_label, LAST_YEAR_DISPLAY)].sum()
            last_month_total = pivot_table[(week_label, LAST_MONTH_DISPLAY)].sum()
            
            # Calculate overall percentage changes
            if last_year_total != 0:
                vs_last_year = ((current_total - last_year_total) / last_year_total * 100)
                weekly_performance.loc[week, 'vs_Last_Year_%'] = round(vs_last_year, 2)
            else:
                weekly_performance.loc[week, 'vs_Last_Year_%'] = 0
                
            if last_month_total != 0:
                vs_last_month = ((current_total - last_month_total) / last_month_total * 100)
                weekly_performance.loc[week, 'vs_Last_Month_%'] = round(vs_last_month, 2)
            else:
                weekly_performance.loc[week, 'vs_Last_Month_%'] = 0
        
        # Add week labels for better readability
        weekly_performance.index = [get_week_label(week) for week in range(1, max_week + 1)]
        
        display(weekly_performance)
        
        return weekly_performance
    else:
        print(f"No comparison data available for {analysis_name}")
        return None

# Analyze all three pivot tables
overall_performance = analyze_growth_patterns(global_idg_pivot, all_idgs_global, max_week_global, "Overall")
ea_performance = analyze_growth_patterns(ea_idg_pivot, all_idgs_ea, max_week_ea, "EA Only")
jumbo_performance = analyze_growth_patterns(jumbo_idg_pivot, all_idgs_jumbo, max_week_jumbo, "Jumbo.ae Only")

# Cross-analysis comparison
print("\n" + "=" * 70)
print("🔄 CROSS-ANALYSIS PERFORMANCE COMPARISON")
print("=" * 70)

if overall_performance is not None and ea_performance is not None and jumbo_performance is not None:
    # Compare average performance across all analyses
    cross_comparison = pd.DataFrame({
        'Analysis': ['Overall', 'EA Only', 'Jumbo.ae Only'],
        'Avg_Growth_vs_Last_Year_%': [
            overall_performance['vs_Last_Year_%'].mean(),
            ea_performance['vs_Last_Year_%'].mean(),
            jumbo_performance['vs_Last_Year_%'].mean()
        ],
        'Avg_Growth_vs_Last_Month_%': [
            overall_performance['vs_Last_Month_%'].mean(),
            ea_performance['vs_Last_Month_%'].mean(),
            jumbo_performance['vs_Last_Month_%'].mean()
        ]
    })
    
    print("📊 Average Growth Rates Comparison:")
    display(cross_comparison.round(2))
    
    # Best and worst performing analysis types
    best_vs_year = cross_comparison.loc[cross_comparison['Avg_Growth_vs_Last_Year_%'].idxmax(), 'Analysis']
    best_vs_month = cross_comparison.loc[cross_comparison['Avg_Growth_vs_Last_Month_%'].idxmax(), 'Analysis']
    
    print(f"\n✅ Performance Highlights:")
    print(f"• Best performing vs Last Year: {best_vs_year}")
    print(f"• Best performing vs Last Month: {best_vs_month}")
else:
    print("Could not complete cross-analysis comparison")

print("\n" + "=" * 70)
print("✅ COMPREHENSIVE GROWTH ANALYSIS COMPLETE")
print("📊 All three pivot tables analyzed for growth patterns")
print("📈 Weekly performance trends calculated and compared")
print("=" * 70)

📊 COMPREHENSIVE GROWTH COMPARISON ANALYSIS

🔍 OVERALL ANALYSIS - Growth Patterns
--------------------------------------------------

📈 Overall - Growth Percentages by Week:


Unnamed: 0_level_0,Week 1,Week 1,Week 2,Week 2,Week 3,Week 3,Week 4,Week 4,Week 5,Week 5,Week 6,Week 6
Unnamed: 0_level_1,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %
ACC,-100.0,-100.0,125.75,-25.85,437.93,44.74,68.3,60.55,75.04,22.81,311.38,-66.78
AV,-100.0,-100.0,-66.39,-33.51,123.74,-28.28,457.82,36.28,-10.23,-43.38,-25.69,inf
GAM,-58.43,20.07,103.28,762.21,68.5,311.55,7.6,91.92,19.33,-71.74,inf,-98.71
IMG,-29.59,-78.39,7.25,-23.6,29.58,-37.81,2.97,-32.98,-13.94,-57.66,1771.04,37.29
IT,10.5,-36.57,81.12,4.83,19.56,-8.84,-11.13,-14.78,47.89,24.95,84.09,-31.54
LA,0.0,-100.0,35.67,-14.95,73.21,-8.88,20.13,42.67,77.22,-2.0,inf,70.66
MM,-47.08,-42.3,249.0,54.54,14.34,-18.31,-28.27,-14.11,18.93,19.35,-4.41,191.3
OAD,-100.0,-100.0,42.1,-40.49,-30.17,7.9,-2.04,-32.02,-55.77,-77.4,191.04,136.13
PG,-100.0,-100.0,40.56,-11.11,-59.7,-49.52,-91.33,-94.01,3.18,97.76,inf,-83.03
PRO,-100.0,0.0,22.66,inf,-100.0,0.0,-100.0,0.0,-90.38,inf,-100.0,0.0



📊 Overall - Average Growth Rates Across All Weeks:
---------------------------------------------
ACC             | Avg vs Last Year:  153.1% | Avg vs Last Month:  -10.8%
AV              | Avg vs Last Year:   63.2% | Avg vs Last Month:  -28.1%
GAM             | Avg vs Last Year:   23.4% | Avg vs Last Month:  169.2%
IMG             | Avg vs Last Year:  294.6% | Avg vs Last Month:  -32.2%
IT              | Avg vs Last Year:   38.7% | Avg vs Last Month:  -10.3%
LA              | Avg vs Last Year:   34.4% | Avg vs Last Month:   -2.1%
MM              | Avg vs Last Year:   33.8% | Avg vs Last Month:   31.7%
OAD             | Avg vs Last Year:    7.5% | Avg vs Last Month:  -17.6%
PG              | Avg vs Last Year:  -34.5% | Avg vs Last Month:  -40.0%
PRO             | Avg vs Last Year:  -78.0% | Avg vs Last Month:    0.0%
S&N             | Avg vs Last Year:  185.2% | Avg vs Last Month:    7.3%
SDA             | Avg vs Last Year:   15.7% | Avg vs Last Month:  -36.6%
TEL             | Avg vs L

Unnamed: 0,vs_Last_Year_%,vs_Last_Month_%
Week 1,-54.2,-33.39
Week 2,42.14,19.56
Week 3,27.41,-4.36
Week 4,-27.23,-21.75
Week 5,74.39,29.35
Week 6,76.83,-35.95



🔍 EA ONLY ANALYSIS - Growth Patterns
--------------------------------------------------

📈 EA Only - Growth Percentages by Week:


Unnamed: 0_level_0,Week 1,Week 1,Week 2,Week 2,Week 3,Week 3,Week 4,Week 4,Week 5,Week 5,Week 6,Week 6
Unnamed: 0_level_1,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %
ACC,0.0,-100.0,344.7,-65.04,178.85,-19.36,-64.17,1.89,65.1,87.48,556.94,716.52
AV,-100.0,-100.0,-81.41,-63.14,inf,-90.72,-67.23,-80.7,83.18,12.45,-100.0,0.0
GAM,0.0,0.0,128.07,161.27,-29.03,181.67,10.13,195.88,-21.16,-0.8,0.0,-100.0
IMG,inf,inf,65.93,0.24,-24.03,29.48,inf,26.52,-31.8,-56.58,0.0,-100.0
IT,-24.51,1.74,52.6,22.56,-18.93,-29.97,-28.39,-14.41,2.68,2.11,248.05,7.49
LA,0.0,-100.0,inf,-48.74,-100.0,-100.0,inf,-30.13,-100.0,-100.0,0.0,0.0
MM,38.52,63.75,494.65,136.45,201.02,9.13,88.73,-14.38,-1.11,175.75,-26.47,367.55
OAD,-100.0,-100.0,191.63,-34.54,32.58,-14.52,-56.18,14.85,-38.98,-74.53,inf,326.32
PG,0.0,-100.0,-52.89,8.0,-65.86,-52.34,-95.3,-96.41,263.02,-37.54,0.0,-100.0
PRO,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0



📊 EA Only - Average Growth Rates Across All Weeks:
---------------------------------------------
ACC             | Avg vs Last Year:  180.2% | Avg vs Last Month:  103.6%
AV              | Avg vs Last Year:  -44.2% | Avg vs Last Month:  -53.7%
GAM             | Avg vs Last Year:   14.7% | Avg vs Last Month:   73.0%
IMG             | Avg vs Last Year:    1.7% | Avg vs Last Month:  -16.7%
IT              | Avg vs Last Year:   38.6% | Avg vs Last Month:   -1.7%
LA              | Avg vs Last Year:  -33.3% | Avg vs Last Month:  -63.1%
MM              | Avg vs Last Year:  132.6% | Avg vs Last Month:  123.0%
OAD             | Avg vs Last Year:    4.8% | Avg vs Last Month:   19.6%
PG              | Avg vs Last Year:    8.2% | Avg vs Last Month:  -63.0%
PRO             | Avg vs Last Year:  -16.7% | Avg vs Last Month:    0.0%
S&N             | Avg vs Last Year:  106.2% | Avg vs Last Month:   66.9%
SDA             | Avg vs Last Year:  127.6% | Avg vs Last Month:  -36.4%
TEL             | Avg vs L

Unnamed: 0,vs_Last_Year_%,vs_Last_Month_%
Week 1,-64.4,20.28
Week 2,29.18,64.12
Week 3,-9.91,-13.28
Week 4,-53.14,-24.85
Week 5,64.42,68.64
Week 6,12.54,-42.21



🔍 JUMBO.AE ONLY ANALYSIS - Growth Patterns
--------------------------------------------------

📈 Jumbo.ae Only - Growth Percentages by Week:


Unnamed: 0_level_0,Week 1,Week 1,Week 2,Week 2,Week 3,Week 3,Week 4,Week 4,Week 5,Week 5,Week 6,Week 6
Unnamed: 0_level_1,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %,v/s Last Year %,v/s Last Month %
ACC,-100.0,-100.0,111.96,-12.95,500.68,58.94,131.91,67.72,79.26,8.24,260.05,-75.67
AV,-100.0,-100.0,-62.78,-26.42,116.64,-7.99,1024.15,68.37,-26.91,-53.67,62.58,inf
GAM,-58.43,20.07,96.89,2655.14,91.79,329.02,7.03,77.45,36.76,-76.0,inf,-98.55
IMG,-100.0,-100.0,-14.76,-34.91,73.5,-47.59,-18.52,-40.38,16.38,-58.68,1771.04,123.73
IT,104.22,-53.79,149.96,-13.58,83.26,17.01,30.95,-15.26,130.91,52.88,-76.38,-89.02
LA,0.0,-100.0,19.94,-6.91,143.89,-6.59,5.77,66.18,94.93,16.94,inf,70.66
MM,-93.41,-93.11,190.47,32.2,-12.56,-27.37,-46.62,-13.96,33.47,-8.52,32.53,115.73
OAD,-100.0,-100.0,5.22,-43.97,-40.33,19.15,29.5,-37.08,-69.2,-80.82,-93.11,-87.82
PG,-100.0,-100.0,138.15,-14.24,-30.74,-41.51,-66.14,-85.4,-12.92,349.12,inf,79.66
PRO,-100.0,0.0,22.66,inf,-100.0,0.0,-100.0,0.0,-87.9,inf,-100.0,0.0



📊 Jumbo.ae Only - Average Growth Rates Across All Weeks:
---------------------------------------------
ACC             | Avg vs Last Year:  164.0% | Avg vs Last Month:   -9.0%
AV              | Avg vs Last Year:  168.9% | Avg vs Last Month:  -20.0%
GAM             | Avg vs Last Year:   29.0% | Avg vs Last Month:  484.5%
IMG             | Avg vs Last Year:  287.9% | Avg vs Last Month:  -26.3%
IT              | Avg vs Last Year:   70.5% | Avg vs Last Month:  -17.0%
LA              | Avg vs Last Year:   44.1% | Avg vs Last Month:    6.7%
MM              | Avg vs Last Year:   17.3% | Avg vs Last Month:    0.8%
OAD             | Avg vs Last Year:  -44.7% | Avg vs Last Month:  -55.1%
PG              | Avg vs Last Year:  -11.9% | Avg vs Last Month:   31.3%
PRO             | Avg vs Last Year:  -77.5% | Avg vs Last Month:    0.0%
S&N             | Avg vs Last Year:  190.6% | Avg vs Last Month:    8.7%
SDA             | Avg vs Last Year:  -12.7% | Avg vs Last Month:  -33.2%
TEL             | Av

Unnamed: 0,vs_Last_Year_%,vs_Last_Month_%
Week 1,33.14,-67.05
Week 2,64.98,-13.02
Week 3,87.38,3.9
Week 4,44.77,-18.74
Week 5,91.81,-4.12
Week 6,223.79,-29.92



🔄 CROSS-ANALYSIS PERFORMANCE COMPARISON
📊 Average Growth Rates Comparison:


Unnamed: 0,Analysis,Avg_Growth_vs_Last_Year_%,Avg_Growth_vs_Last_Month_%
0,Overall,23.22,-7.76
1,EA Only,-3.55,12.12
2,Jumbo.ae Only,90.98,-21.49



✅ Performance Highlights:
• Best performing vs Last Year: Jumbo.ae Only
• Best performing vs Last Month: EA Only

✅ COMPREHENSIVE GROWTH ANALYSIS COMPLETE
📊 All three pivot tables analyzed for growth patterns
📈 Weekly performance trends calculated and compared


In [321]:
# Export All IDG Pivot Tables to Excel - Combined in Single Sheet
print("📤 EXPORTING ALL THREE PIVOT TABLES TO SINGLE EXCEL SHEET...")
print("=" * 65)

# Create Excel writer object
output_file = 'IDG_Weekly_Analysis_Combined.xlsx'
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    
    # Create combined sheet with all three analyses
    print("📊 Creating Combined IDG Analysis Sheet...")
    
    # Create a workbook and worksheet - ensure we have a proper worksheet
    workbook = writer.book
    # Remove default sheet if it exists
    if workbook.worksheets:
        workbook.remove(workbook.active)
    
    # Create our main worksheet
    worksheet = workbook.create_sheet('Combined_IDG_Analysis', 0)
    
    # Define styles for different sections
    main_title_font = Font(bold=True, size=14, color='FFFFFF')
    main_title_fill = PatternFill(start_color='1F4E79', end_color='1F4E79', fill_type='solid')
    
    section_header_font = Font(bold=True, size=12, color='FFFFFF')
    section_header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
    
    column_header_font = Font(bold=True, size=10, color='FFFFFF')
    column_header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
    
    idg_name_font = Font(bold=True, size=9)
    idg_name_fill = PatternFill(start_color='F2F2F2', end_color='F2F2F2', fill_type='solid')
    
    total_row_font = Font(bold=True, size=10)
    total_row_fill = PatternFill(start_color='FFE699', end_color='FFE699', fill_type='solid')
    
    subheader_font = Font(bold=True, size=9, color='FFFFFF')
    subheader_fill = PatternFill(start_color='5B9BD5', end_color='5B9BD5', fill_type='solid')
    
    row_header_font = Font(bold=True, size=9)
    row_header_fill = PatternFill(start_color='E7E6E6', end_color='E7E6E6', fill_type='solid')
    
    number_format = '#,##0'
    thin_border = Border(
        left=Side(style='thin'), right=Side(style='thin'),
        top=Side(style='thin'), bottom=Side(style='thin')
    )
    
    # Function to write monthly total data (just Total column) to a dedicated worksheet
    def write_monthly_total_to_sheet(pivot_df, analysis_title, worksheet):
        # Extract only the Total column data
        total_data = pivot_df["Total"]
        
        # Section title for the sheet - set value first
        worksheet.cell(row=1, column=1, value=analysis_title)
        title_cell = worksheet.cell(row=1, column=1)
        title_cell.font = section_header_font
        title_cell.fill = section_header_fill
        title_cell.alignment = Alignment(horizontal='center', vertical='center')
        
        # Calculate merge range
        num_cols = len(total_data.columns)
        
        # Apply border to all cells that will be merged BEFORE merging
        for col_idx_merge in range(1, num_cols + 2):
            cell = worksheet.cell(row=1, column=col_idx_merge)
            cell.border = thin_border
            # Set same formatting to all cells that will be merged
            if col_idx_merge > 1:
                cell.font = section_header_font
                cell.fill = section_header_fill
                cell.alignment = Alignment(horizontal='center', vertical='center')
        
        # Now merge cells AFTER setting values and formatting
        worksheet.merge_cells(start_row=1, start_column=1, 
                            end_row=1, end_column=num_cols + 1) # +1 for the IDG column
        
        current_row = 2 # Start headers from row 2
        
        # Write column headers - just "IDG" and the period names from Total
        worksheet.cell(row=current_row, column=1, value="IDG")
        idg_header_cell = worksheet.cell(row=current_row, column=1)
        idg_header_cell.font = column_header_font
        idg_header_cell.fill = column_header_fill
        idg_header_cell.alignment = Alignment(horizontal='center', vertical='center')
        idg_header_cell.border = thin_border
        
        col_idx = 2
        for period in total_data.columns:
            worksheet.cell(row=current_row, column=col_idx, value=period)
            header_cell = worksheet.cell(row=current_row, column=col_idx)
            header_cell.font = column_header_font
            header_cell.fill = column_header_fill
            header_cell.alignment = Alignment(horizontal='center', vertical='center')
            header_cell.border = thin_border
            col_idx += 1
        
        current_row += 1
        
        # Write data rows
        for idg_name in total_data.index:
            worksheet.cell(row=current_row, column=1, value=idg_name)
            name_cell = worksheet.cell(row=current_row, column=1)
            if idg_name == 'Total':
                name_cell.font = total_row_font
                name_cell.fill = total_row_fill
            else:
                name_cell.font = idg_name_font
                name_cell.fill = idg_name_fill
            name_cell.alignment = Alignment(horizontal='left', vertical='center')
            name_cell.border = thin_border
            
            col_idx = 2
            for period in total_data.columns:
                value = total_data.loc[idg_name, period]
                worksheet.cell(row=current_row, column=col_idx, value=value)
                data_cell = worksheet.cell(row=current_row, column=col_idx)
                
                if idg_name == 'Total':
                    data_cell.font = total_row_font
                    data_cell.fill = total_row_fill
                else:
                    data_cell.font = Font(size=9)
                
                # Format percentage columns
                if 'v/s' in period and '%' in period:
                    data_cell.number_format = '0"%"'
                    if isinstance(value, (int, float)) and value != float('inf') and value != float('-inf') and value != 0:
                        if period == 'v/s Target %':
                            if value >= 100:
                                data_cell.font = Font(color='008000', size=9, bold=idg_name=='Total')
                            else:
                                data_cell.font = Font(color='FF0000', size=9, bold=idg_name=='Total')
                        else:
                            if value > 0:
                                data_cell.font = Font(color='008000', size=9, bold=idg_name=='Total')
                            elif value < 0:
                                data_cell.font = Font(color='FF0000', size=9, bold=idg_name=='Total')
                else:
                    data_cell.number_format = number_format
                
                data_cell.alignment = Alignment(horizontal='right', vertical='center')
                data_cell.border = thin_border
                col_idx += 1
            current_row += 1
            
        # Auto-adjust column widths for this worksheet
        for col_letter_idx in range(1, worksheet.max_column + 1):
            max_length = 0
            column_letter = get_column_letter(col_letter_idx)
            for row_idx in range(1, worksheet.max_row + 1):
                try:
                    cell_value = str(worksheet.cell(row=row_idx, column=col_letter_idx).value)
                    if len(cell_value) > max_length:
                        max_length = len(cell_value)
                except:
                    pass
            adjusted_width = min(max_length + 2, 30) # Max width of 30
            worksheet.column_dimensions[column_letter].width = adjusted_width

        # Freeze panes for this worksheet (IDG name and headers)
        worksheet.freeze_panes = 'B3' # Headers and IDG column frozen
    
    # MODIFIED Function to write a single pivot table to a dedicated worksheet
    def write_pivot_table_to_sheet(pivot_df, analysis_title, worksheet):
        # Section title for the sheet - set value first
        worksheet.cell(row=1, column=1, value=analysis_title)
        title_cell = worksheet.cell(row=1, column=1)
        title_cell.font = section_header_font # Use section_header_font for individual sheet titles
        title_cell.fill = section_header_fill
        title_cell.alignment = Alignment(horizontal='center', vertical='center')
        
        # Calculate merge range
        num_cols = len(pivot_df.columns)
        
        # Apply border and formatting to all cells that will be merged BEFORE merging
        for col_idx_merge in range(1, num_cols + 2):
            cell = worksheet.cell(row=1, column=col_idx_merge)
            cell.border = thin_border
            # Set same formatting to all cells that will be merged
            if col_idx_merge > 1:
                cell.font = section_header_font
                cell.fill = section_header_fill
                cell.alignment = Alignment(horizontal='center', vertical='center')
        
        # Now merge cells AFTER setting values and formatting
        worksheet.merge_cells(start_row=1, start_column=1, 
                            end_row=1, end_column=num_cols + 1) # +1 for the IDG column
        
        current_row = 2 # Start headers from row 2
        
        # Write column headers (multi-level)
        # First level headers (Week labels)
        col_idx = 2  # Start from column 2 (column 1 is for IDG names)
        for week_label in pivot_df.columns.get_level_values(0).unique():
            week_cols = [col for col in pivot_df.columns if col[0] == week_label]
            num_sub_cols = len(week_cols)
            
            worksheet.cell(row=current_row, column=col_idx, value=week_label)
            week_cell = worksheet.cell(row=current_row, column=col_idx)
            week_cell.font = column_header_font
            week_cell.fill = column_header_fill
            week_cell.alignment = Alignment(horizontal='center', vertical='center')
            week_cell.border = thin_border
            
            if num_sub_cols > 1:
                worksheet.merge_cells(start_row=current_row, start_column=col_idx,
                                    end_row=current_row, end_column=col_idx + num_sub_cols - 1)
                for i in range(num_sub_cols):
                    cell = worksheet.cell(row=current_row, column=col_idx + i)
                    cell.font = column_header_font
                    cell.fill = column_header_fill
                    cell.border = thin_border
            col_idx += num_sub_cols
        
        current_row += 1
        
        # Second level headers (Period names)
        worksheet.cell(row=current_row, column=1, value="IDG")
        idg_header_cell = worksheet.cell(row=current_row, column=1)
        idg_header_cell.font = column_header_font
        idg_header_cell.fill = column_header_fill
        idg_header_cell.alignment = Alignment(horizontal='center', vertical='center')
        idg_header_cell.border = thin_border
        
        col_idx = 2
        for col_multi_idx in pivot_df.columns:
            worksheet.cell(row=current_row, column=col_idx, value=col_multi_idx[1])
            sub_header_cell = worksheet.cell(row=current_row, column=col_idx)
            sub_header_cell.font = column_header_font
            sub_header_cell.fill = column_header_fill
            sub_header_cell.alignment = Alignment(horizontal='center', vertical='center')
            sub_header_cell.border = thin_border
            col_idx += 1
        
        current_row += 1
        
        # Write data rows
        for idg_name in pivot_df.index:
            worksheet.cell(row=current_row, column=1, value=idg_name)
            name_cell = worksheet.cell(row=current_row, column=1)
            if idg_name == 'Total':
                name_cell.font = Font(bold=True, size=10)
                name_cell.fill = PatternFill(start_color='FFE699', end_color='FFE699', fill_type='solid')
            else:
                name_cell.font = idg_name_font
                name_cell.fill = idg_name_fill
            name_cell.alignment = Alignment(horizontal='left', vertical='center')
            name_cell.border = thin_border
            
            col_idx = 2
            for col_multi_idx in pivot_df.columns:
                value = pivot_df.loc[idg_name, col_multi_idx]
                worksheet.cell(row=current_row, column=col_idx, value=value)
                data_cell = worksheet.cell(row=current_row, column=col_idx)
                
                if idg_name == 'Total':
                    data_cell.font = Font(bold=True, size=9)
                    data_cell.fill = PatternFill(start_color='FFE699', end_color='FFE699', fill_type='solid')
                else:
                    data_cell.font = Font(size=9)
                
                if 'v/s' in col_multi_idx[1] and '%' in col_multi_idx[1]:
                    data_cell.number_format = '0"%"'
                    if isinstance(value, (int, float)) and value != float('inf') and value != float('-inf') and value != 0 : # Check for valid numeric value
                        if col_multi_idx[1] == 'v/s Target %':
                            if value >= 100:
                                data_cell.font = Font(color='008000', size=9, bold=idg_name=='Total')
                            else:
                                data_cell.font = Font(color='FF0000', size=9, bold=idg_name=='Total')
                        else:
                            if value > 0:
                                data_cell.font = Font(color='008000', size=9, bold=idg_name=='Total')
                            elif value < 0:
                                data_cell.font = Font(color='FF0000', size=9, bold=idg_name=='Total')
                else:
                    data_cell.number_format = number_format
                
                data_cell.alignment = Alignment(horizontal='right', vertical='center')
                data_cell.border = thin_border
                col_idx += 1
            current_row += 1
            
        # Auto-adjust column widths for this worksheet
        for col_letter_idx in range(1, worksheet.max_column + 1):
            max_length = 0
            column_letter = get_column_letter(col_letter_idx)
            for row_idx in range(1, worksheet.max_row + 1):
                try:
                    cell_value = str(worksheet.cell(row=row_idx, column=col_letter_idx).value)
                    if len(cell_value) > max_length:
                        max_length = len(cell_value)
                except:
                    pass
            adjusted_width = min(max_length + 2, 30) # Max width of 30, increased padding
            worksheet.column_dimensions[column_letter].width = adjusted_width

        # Freeze panes for this worksheet (IDG name and headers)
        # Freeze row 3 (headers) and column 1 (IDG names)
        worksheet.freeze_panes = 'B1' # Top 3 rows and 1st column frozen

    # Main Excel writing block
    # Remove default sheet if it exists
    if workbook.worksheets:
        workbook.remove(workbook.active)
    
    # Data for sheets - including both detailed and monthly summary sheets
    analyses_to_export = [
        {"name": "Cat_Week_Total", "title": f"OVERALL IDG ANALYSIS ({max_invoice_day} days)", "data": global_idg_pivot, "type": "detailed"},
        {"name": "Cat_Week_EA", "title": f"EA ONLY IDG ANALYSIS ({max_invoice_day} days)", "data": ea_idg_pivot, "type": "detailed"},
        {"name": "Cat_Week_JumboAE", "title": f"JUMBO.AE ONLY IDG ANALYSIS ({max_invoice_day} days)", "data": jumbo_idg_pivot, "type": "detailed"},
        {"name": "Cat_Month_Total", "title": f"MONTHLY OVERALL IDG ANALYSIS ({max_invoice_day} days)", "data": global_idg_pivot, "type": "monthly"},
        {"name": "Cat_Month_EA", "title": f"MONTHLY EA ONLY IDG ANALYSIS ({max_invoice_day} days)", "data": ea_idg_pivot, "type": "monthly"},
        {"name": "Cat_Month_JumboAE", "title": f"MONTHLY JUMBO.AE ONLY IDG ANALYSIS ({max_invoice_day} days)", "data": jumbo_idg_pivot, "type": "monthly"},
    ]

    sheet_idx_counter = 0
    for analysis in analyses_to_export:
        print(f"  📊 Writing {analysis['title']} to sheet: {analysis['name']}...")
        ws = workbook.create_sheet(analysis['name'], sheet_idx_counter)
        
        if analysis['type'] == 'detailed':
            write_pivot_table_to_sheet(analysis['data'].round(2), analysis['title'], ws)
        elif analysis['type'] == 'monthly':
            write_monthly_total_to_sheet(analysis['data'].round(2), analysis['title'], ws)
        
        sheet_idx_counter += 1
    
    # Create additional supporting sheets (Summary Dashboard, Weekly Totals)
    # These will be created after the individual analysis sheets.
    
    # 2. Summary Dashboard Sheet - Enhanced with multiple tables
    print("📊 Creating Enhanced Summary Dashboard...")
    
    # Create Summary Dashboard sheet manually with multiple tables
    summary_ws = workbook.create_sheet('Weekly')
    
    # Define styles for the Summary Dashboard
    header_font = Font(bold=True, size=12, color='FFFFFF')
    header_fill = PatternFill(start_color='1F4E79', end_color='1F4E79', fill_type='solid')
    subheader_font = Font(bold=True, size=10)
    subheader_fill = PatternFill(start_color='D6E4F0', end_color='D6E4F0', fill_type='solid')
    table_border = Border(
        left=Side(style='thin'), right=Side(style='thin'),
        top=Side(style='thin'), bottom=Side(style='thin')
    )
    
    current_row = 1
    
    # ===== TABLE 1: Latest Week Summary by Channel =====
    # Get the latest week data from final_output - Use the week containing max_invoice_day
    latest_week_num, _ = get_week_info(max_invoice_day, first_day_position)
    
    # Get all days that belong to this week
    latest_week_days = []
    for day in final_output['Day'].unique():
        if get_week_info(day, first_day_position)[0] == latest_week_num:
            latest_week_days.append(day)
    
    latest_week_days = sorted(latest_week_days)
    latest_week_data = final_output[final_output['Day'].isin(latest_week_days)]
    
    # Create summary table for latest week - Check if cell is already merged
    latest_week_title = f"Latest Week Summary (Days {min(latest_week_days)}-{max(latest_week_days)})"
    title_cell = summary_ws.cell(row=current_row, column=1)
    
    # Check if cell is already merged
    try:
        # Try to set value - if it's not merged, this will work
        title_cell.value = latest_week_title
        title_cell.font = header_font
        title_cell.fill = header_fill
        
        # Apply same formatting to cells that will be merged
        for col_idx in range(2, 9):
            cell = summary_ws.cell(row=current_row, column=col_idx)
            cell.font = header_font
            cell.fill = header_fill
        
        # Now merge cells AFTER setting values and formatting
        summary_ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=8)
        
    except AttributeError:
        # Cell is already merged, skip the merge operation
        print(f"⚠️ Latest Week Summary title cell is already merged, skipping merge operation")
        pass
    current_row += 2
    
    # Headers for Table 1
    headers_t1 = ['Analysis Type'] + [f'Day {day}' for day in latest_week_days]
    for col, header in enumerate(headers_t1, 1):
        cell = summary_ws.cell(row=current_row, column=col, value=header)
        cell.font = subheader_font
        cell.fill = subheader_fill
        cell.border = table_border
    current_row += 1
    
    # Data for Table 1 - from final_output
    analysis_types = ['Total', 'Jumbo.ae', 'EA']
    for analysis_type in analysis_types:
        summary_ws.cell(row=current_row, column=1, value=analysis_type).border = table_border
        
        for day_idx, day in enumerate(latest_week_days, 2):
            day_data = latest_week_data[latest_week_data['Day'] == day]
            if not day_data.empty:
                # Get the current display value for this analysis type and day
                if analysis_type in final_output.columns.get_level_values(0):
                    value = day_data[(analysis_type, CURRENT_DISPLAY)].iloc[0] if len(day_data) > 0 else 0
                else:
                    value = 0
                cell = summary_ws.cell(row=current_row, column=day_idx, value=value)
                cell.number_format = '#,##0'
                cell.border = table_border
        current_row += 1
    
    # ===== TABLE 2: Channel Performance for Latest Week - SIDE BY SIDE =====
    # Position this table to the right of Table 1 instead of below it
    
    # Calculate the starting column for Table 2 (right side)
    table2_start_col = len(latest_week_days) + 3  # After Table 1 columns + some spacing
    table2_start_row = 1  # Start from the same row as Table 1
    
    # Title for Table 2 - Check if cell is already merged before setting value
    title_text_t2 = f"Channel Performance - Latest Week (Days {min(latest_week_days)}-{max(latest_week_days)})"
    
    # Get the cell - it might already be merged
    title_cell_t2 = summary_ws.cell(row=table2_start_row, column=table2_start_col)
    
    # Check if cell is already merged
    try:
        # Try to set value - if it's not merged, this will work
        title_cell_t2.value = title_text_t2
        title_cell_t2.font = header_font
        title_cell_t2.fill = header_fill
        
        # Apply same formatting to cells that will be merged
        for col_idx in range(table2_start_col + 1, table2_start_col + len(latest_week_days) + 1):
            cell = summary_ws.cell(row=table2_start_row, column=col_idx)
            cell.font = header_font
            cell.fill = header_fill
        
        # Now merge cells AFTER setting values and formatting
        summary_ws.merge_cells(start_row=table2_start_row, start_column=table2_start_col, end_row=table2_start_row, end_column=table2_start_col + len(latest_week_days))
        
    except AttributeError:
        # Cell is already merged, skip the merge operation
        print(f"⚠️ Table 2 title cell is already merged, skipping merge operation")
        pass
    
    # Headers for Table 2
    headers_t2 = ['Channel'] + [f'Day {day}' for day in latest_week_days]
    for col_idx, header in enumerate(headers_t2):
        cell = summary_ws.cell(row=table2_start_row + 2, column=table2_start_col + col_idx, value=header)
        cell.font = subheader_font
        cell.fill = subheader_fill
        cell.border = table_border
    
    # Data for Table 2
    table2_current_row = table2_start_row + 3
    
    # Get available channels from sessions data if pivot exists
    try:
        # Try to get channel data from the pivot table (from styled_pivot data source)
        if 'pivot' in locals() and hasattr(pivot, 'columns'):
            available_channels = [col[1] for col in pivot.columns if col[0] == 'Sessions']  # Include all channels
        else:
            available_channels = ['Email', 'Organic', 'Paid Perf', 'Paid Other']  # Default channels
        
        for channel in available_channels:
            summary_ws.cell(row=table2_current_row, column=table2_start_col, value=channel).border = table_border
            
            for day_idx, day in enumerate(latest_week_days):
                # Try to get session data for this channel and day
                try:
                    if 'pivot' in locals() and day <= len(pivot):
                        # Get sessions data from pivot table
                        sessions_value = pivot.loc[pivot['Day'] == day, ('Sessions', channel)].iloc[0] if len(pivot[pivot['Day'] == day]) > 0 else 0
                    else:
                        sessions_value = 0
                except:
                    sessions_value = 0
                
                cell = summary_ws.cell(row=table2_current_row, column=table2_start_col + 1 + day_idx, value=sessions_value)
                cell.number_format = '#,##0'
                cell.border = table_border
            table2_current_row += 1
            
    except Exception as e:
        # Fallback: create empty table structure
        for channel in ['Email', 'Organic', 'Paid Perf', 'Paid Other']:
            summary_ws.cell(row=table2_current_row, column=table2_start_col, value=channel).border = table_border
            for day_idx in range(len(latest_week_days)):
                cell = summary_ws.cell(row=table2_current_row, column=table2_start_col + 1 + day_idx, value=0)
                cell.number_format = '#,##0'
                cell.border = table_border
            table2_current_row += 1
    
    # Continue with remaining tables below both tables (use max height of both tables)
    current_row = max(current_row, table2_current_row) + 2
    
    current_row += 2
    
    # ===== TABLE 3: Traditional Summary Analysis =====
    # summary_ws.cell(row=current_row, column=1, value="Analysis Summary Overview")
    # title_cell = summary_ws.cell(row=current_row, column=1)
    # title_cell.font = header_font
    # title_cell.fill = header_fill
    # summary_ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=7)
    # current_row += 2
    
    # # Create traditional summary data
    # summary_data = {
    #     'Analysis_Type': ['Overall', 'EA Only', 'Jumbo.ae Only'],
    #     'IDG_Categories': [len(all_idgs_global), len(all_idgs_ea), len(all_idgs_jumbo)],
    #     'Weeks_Analyzed': [max_week_global, max_week_ea, max_week_jumbo]
    # }
    
    # # Add period totals for each analysis
    # for period in periods:
    #     summary_data[f'{period}_Total'] = [
    #         global_idg_pivot.loc['Total', ('Total', period)],
    #         ea_idg_pivot.loc['Total', ('Total', period)],
    #         jumbo_idg_pivot.loc['Total', ('Total', period)]
    #     ]
    
    # # Add growth percentages
    # summary_data[f'Growth_vs_{LAST_YEAR_DISPLAY}_%'] = [
    #     global_idg_pivot.loc['Total', ('Total', 'v/s Last Year %')],
    #     ea_idg_pivot.loc['Total', ('Total', 'v/s Last Year %')],
    #     jumbo_idg_pivot.loc['Total', ('Total', 'v/s Last Year %')]
    # ]
    
    # summary_data[f'Growth_vs_{LAST_MONTH_DISPLAY}_%'] = [
    #     global_idg_pivot.loc['Total', ('Total', 'v/s Last Month %')],
    #     ea_idg_pivot.loc['Total', ('Total', 'v/s Last Month %')],
    #     jumbo_idg_pivot.loc['Total', ('Total', 'v/s Last Month %')]
    # ]
    
    # # Write traditional summary headers
    # summary_headers = list(summary_data.keys())
    # for col, header in enumerate(summary_headers, 1):
    #     cell = summary_ws.cell(row=current_row, column=col, value=header)
    #     cell.font = subheader_font
    #     cell.fill = subheader_fill
    #     cell.border = table_border
    # current_row += 1
    
    # # Write traditional summary data
    # for row_idx in range(len(summary_data['Analysis_Type'])):
    #     for col_idx, key in enumerate(summary_headers, 1):
    #         value = summary_data[key][row_idx]
    #         cell = summary_ws.cell(row=current_row, column=col_idx, value=value)
    #         if isinstance(value, (int, float)) and 'Total' in key:
    #             cell.number_format = '#,##0'
    #         elif isinstance(value, (int, float)) and '%' in key:
    #             cell.number_format = '0"%"'
    #         cell.border = table_border
    #     current_row += 1
    
    # # Auto-adjust column widths
    # for column in summary_ws.columns:
    #     max_length = 0
    #     column_letter = get_column_letter(column[0].column)
    #     for cell in column:
    #         try:
    #             if len(str(cell.value)) > max_length:
    #                 max_length = len(str(cell.value))
    #         except:
    #             pass
    #     adjusted_width = (max_length + 2)
    #     summary_ws.column_dimensions[column_letter].width = adjusted_width
    
    # current_row += 2
    
    # ===== TABLE 4: Pivot Summary Table =====
    # Add a table showing Total row from highest week of each pivot
    
    # Calculate dynamic week information for the title
    # Find the highest week from the global pivot (which should be consistent across all pivots)
    max_week_for_title = max_week_global
    
    # Calculate the day range for this week using actual week calculation logic
    # Find the actual start and end days for the specified week
    week_start_day = None
    week_end_day = None
    
    # Iterate through all possible days to find which belong to max_week_for_title
    for day in range(1, max_invoice_day + 1):
        current_week_num, _ = get_week_info(day, first_day_position)
        if current_week_num == max_week_for_title:
            if week_start_day is None:
                week_start_day = day
            week_end_day = day
    
    # Fallback if no days found for the week (shouldn't happen)
    if week_start_day is None:
        week_start_day = 1
        week_end_day = min(7, max_invoice_day)
    
    # Create the dynamic title - Check if cell is already merged
    table_title = f"Sales Data of Week {max_week_for_title} ({week_start_day}-{week_end_day})"
    
    # Get the cell and check if it's already merged
    title_cell = summary_ws.cell(row=current_row, column=1)
    
    try:
        # Try to set value - if it's not merged, this will work
        title_cell.value = table_title
        title_cell.font = header_font
        title_cell.fill = header_fill
        
        # Apply same formatting to cells that will be merged
        for col_idx in range(2, 9):
            cell = summary_ws.cell(row=current_row, column=col_idx)
            cell.font = header_font
            cell.fill = header_fill
        
        # Now merge cells AFTER setting values and formatting
        summary_ws.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=8)
        
    except AttributeError:
        # Cell is already merged, skip the merge operation
        print(f"⚠️ Sales Data title cell is already merged, skipping merge operation")
        pass
    current_row += 2
    
    # Headers for Table 4 - Dynamic period headers
    headers_t4 = ['Analysis Type', LAST_MONTH_DISPLAY, LAST_YEAR_DISPLAY, 'Target', CURRENT_DISPLAY, 'v/s Target %', 'v/s Last Year %', 'v/s Last Month %']
    for col, header in enumerate(headers_t4, 1):
        cell = summary_ws.cell(row=current_row, column=col, value=header)
        cell.font = subheader_font
        cell.fill = subheader_fill
        cell.border = table_border
    current_row += 1
    
    # Data for Table 4 - Extract Total row from last week of each pivot
    pivot_analyses = [
        ('Overall', global_idg_pivot),
        ('Jumbo.ae', jumbo_idg_pivot),
        ('EA', ea_idg_pivot)
    ]
    
    for analysis_name, pivot_data in pivot_analyses:
        if not pivot_data.empty:
            # Find the highest week number in the pivot
            week_columns = [col for col in pivot_data.columns if col[0] != 'Total']
            if week_columns:
                # Extract week numbers and find the maximum
                week_numbers = []
                for col in week_columns:
                    week_label = col[0]
                    if 'Week' in week_label:
                        try:
                            week_num = int(week_label.split(' ')[1])
                            week_numbers.append(week_num)
                        except:
                            pass
                
                if week_numbers:
                    max_week = max(week_numbers)
                    max_week_label = f"Week {max_week}"
                    
                    # Get Total row data for the highest week
                    try:
                        # Extract values for each period from the Total row
                        last_month_val = pivot_data.loc['Total', (max_week_label, LAST_MONTH_DISPLAY)] if (max_week_label, LAST_MONTH_DISPLAY) in pivot_data.columns else 0
                        last_year_val = pivot_data.loc['Total', (max_week_label, LAST_YEAR_DISPLAY)] if (max_week_label, LAST_YEAR_DISPLAY) in pivot_data.columns else 0
                        current_val = pivot_data.loc['Total', (max_week_label, CURRENT_DISPLAY)] if (max_week_label, CURRENT_DISPLAY) in pivot_data.columns else 0
                        target_val = pivot_data.loc['Total', (max_week_label, 'Target')] if (max_week_label, 'Target') in pivot_data.columns else 0
                        vs_target_pct = pivot_data.loc['Total', (max_week_label, 'v/s Target %')] if (max_week_label, 'v/s Target %') in pivot_data.columns else 0
                        vs_last_year_pct = pivot_data.loc['Total', (max_week_label, 'v/s Last Year %')] if (max_week_label, 'v/s Last Year %') in pivot_data.columns else 0
                        vs_last_month_pct = pivot_data.loc['Total', (max_week_label, 'v/s Last Month %')] if (max_week_label, 'v/s Last Month %') in pivot_data.columns else 0
                        
                        # Write row data
                        row_data = [analysis_name, last_month_val, last_year_val, target_val, current_val, vs_target_pct, vs_last_year_pct, vs_last_month_pct]
                        
                        for col_idx, value in enumerate(row_data, 1):
                            cell = summary_ws.cell(row=current_row, column=col_idx, value=value)
                            
                            # Format based on column type
                            if col_idx == 1:  # Analysis Type column
                                cell.font = Font(bold=True, size=9)
                                cell.fill = row_header_fill
                            elif col_idx in [6, 7, 8]:  # Percentage columns
                                cell.number_format = '0"%"'
                                if isinstance(value, (int, float)) and value > 0:
                                    cell.font = Font(color='008000', size=9)
                                elif isinstance(value, (int, float)) and value < 0:
                                    cell.font = Font(color='FF0000', size=9)
                            else:  # Value columns
                                cell.number_format = '#,##0'
                            
                            cell.border = table_border
                            cell.alignment = Alignment(horizontal='right' if col_idx > 1 else 'left', vertical='center')
                        
                        current_row += 1
                        
                    except Exception as e:
                        print(f"Warning: Could not extract data for {analysis_name}: {e}")
                        # Write empty row if data extraction fails
                        for col_idx in range(1, 9):
                            cell = summary_ws.cell(row=current_row, column=col_idx, value=0 if col_idx > 1 else analysis_name)
                            cell.border = table_border
                        current_row += 1
                else:
                    # No week data found, write empty row
                    for col_idx in range(1, 9):
                        cell = summary_ws.cell(row=current_row, column=col_idx, value=0 if col_idx > 1 else analysis_name)
                        cell.border = table_border
                    current_row += 1
            else:
                # No week columns found, write empty row
                for col_idx in range(1, 9):
                    cell = summary_ws.cell(row=current_row, column=col_idx, value=0 if col_idx > 1 else analysis_name)
                    cell.border = table_border
                current_row += 1
        else:
            # Empty pivot data, write empty row
            for col_idx in range(1, 9):
                cell = summary_ws.cell(row=current_row, column=col_idx, value=0 if col_idx > 1 else analysis_name)
                cell.border = table_border
            current_row += 1
    
    # ===== THREE SIDE-BY-SIDE TABLES: Latest Week Data from Each Pivot =====
    current_row += 3  # Add some spacing
    
    # Get the latest/highest week for each pivot table
    max_week_label_global = get_week_label(max_week_global)
    max_week_label_jumbo = get_week_label(max_week_jumbo)
    max_week_label_ea = get_week_label(max_week_ea)
    
    # Table configurations - adjust spacing for more columns
    tables_config = [
        {
            'title': 'Overall - Latest Week',
            'pivot_data': global_idg_pivot,
            'max_week_label': max_week_label_global,
            'all_idgs': all_idgs_global,
            'start_col': 1
        },
        {
            'title': 'Jumbo.ae - Latest Week', 
            'pivot_data': jumbo_idg_pivot,
            'max_week_label': max_week_label_jumbo,
            'all_idgs': all_idgs_jumbo,
            'start_col': 10  # Start after Overall table (8 columns + spacing)
        },
        {
            'title': 'EA - Latest Week',
            'pivot_data': ea_idg_pivot, 
            'max_week_label': max_week_label_ea,
            'all_idgs': all_idgs_ea,
            'start_col': 19  # Start after Jumbo.ae table (8 columns + spacing)
        }
    ]
    
    # Create three tables side by side
    tables_start_row = current_row
    
    for table_config in tables_config:
        pivot_data = table_config['pivot_data']
        max_week_label = table_config['max_week_label']
        all_idgs = table_config['all_idgs']
        start_col = table_config['start_col']
        title = table_config['title']
        
        # Title for each table - check if cell is already merged before setting value
        title_cell = summary_ws.cell(row=tables_start_row, column=start_col)
        
        try:
            # Try to set value - if it's not merged, this will work
            title_cell.value = title
            title_cell.font = header_font
            title_cell.fill = header_fill
            title_cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Calculate merge range based on expected number of columns
            expected_columns = 8  # IDG + 7 data columns (typical pivot structure)
            
            # Apply same formatting to cells that will be merged
            for col_idx in range(start_col + 1, start_col + expected_columns):
                cell = summary_ws.cell(row=tables_start_row, column=col_idx)
                cell.font = header_font
                cell.fill = header_fill
                cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Now merge cells AFTER setting values and formatting
            summary_ws.merge_cells(start_row=tables_start_row, start_column=start_col, 
                                  end_row=tables_start_row, end_column=start_col + expected_columns - 1)
                                  
        except AttributeError:
            # Cell is already merged, skip the merge operation
            print(f"⚠️ Table '{title}' title cell is already merged, skipping merge operation")
            pass
        
        # Headers for the table (IDG, and all columns for latest week)
        headers_row = tables_start_row + 2
        headers = ['IDG']
        
        # Get the complete column structure for the latest week from this pivot
        if not pivot_data.empty and max_week_label in [col[0] for col in pivot_data.columns]:
            # Get all columns for the latest week in the exact order they appear in the pivot
            latest_week_columns = []
            for col in pivot_data.columns:
                if col[0] == max_week_label:
                    latest_week_columns.append(col[1])  # Get the period/metric name
            
            # Remove duplicates while preserving order
            headers.extend(list(dict.fromkeys(latest_week_columns)))
        else:
            # Fallback: use the standard structure
            headers.extend([LAST_MONTH_DISPLAY, LAST_YEAR_DISPLAY, 'Target', CURRENT_DISPLAY, 
                           'v/s Target %', 'v/s Last Year %', 'v/s Last Month %'])
        
        # Write headers with proper coloring
        for col_idx, header in enumerate(headers):
            cell = summary_ws.cell(row=headers_row, column=start_col + col_idx, value=header)
            cell.font = subheader_font
            cell.border = table_border
            
            # Apply specific colors based on header type
            if header == CURRENT_DISPLAY:
                # Highlight current/latest month with gold
                cell.fill = PatternFill(start_color='FFD700', end_color='FFD700', fill_type='solid')
                cell.font = Font(bold=True, size=10, color='000000')  # Black text on gold
            elif 'v/s' in header and '%' in header:
                # Highlight comparison percentages with chocolate
                cell.fill = PatternFill(start_color='D2691E', end_color='D2691E', fill_type='solid')
                cell.font = Font(bold=True, size=10, color='FFFFFF')  # White text on chocolate
            else:
                # Default styling
                cell.fill = subheader_fill
        
        # Write data rows
        data_start_row = headers_row + 1
        current_data_row = data_start_row
        
        try:
            if not pivot_data.empty and all_idgs:
                for idg in all_idgs:
                    # IDG name
                    idg_cell = summary_ws.cell(row=current_data_row, column=start_col, value=idg)
                    idg_cell.border = table_border
                    idg_cell.font = Font(bold=(idg == 'Total'))
                    
                    # Data for each column in latest week
                    for col_idx, column_name in enumerate(headers[1:], 1):
                        try:
                            if (max_week_label, column_name) in pivot_data.columns:
                                value = pivot_data.loc[idg, (max_week_label, column_name)]
                                if pd.isna(value):
                                    value = 0
                            else:
                                value = 0
                        except:
                            value = 0
                        
                        data_cell = summary_ws.cell(row=current_data_row, column=start_col + col_idx, value=value)
                        data_cell.border = table_border
                        
                        # Format based on column type with proper coloring
                        if '%' in column_name:
                            # Percentage columns - apply color formatting based on value
                            data_cell.number_format = '0"%"'  # Show as whole percentage (56% not 0.56%)
                            
                            # Apply percentage coloring logic (keep value as-is, don't divide by 100)
                            if isinstance(value, (int, float)) and value != float('inf') and value != float('-inf') and value != 0:
                                if column_name == 'v/s Target %':
                                    # Target percentage: Green if >= 100, Red if < 100
                                    if value >= 100:
                                        data_cell.font = Font(color='008000', size=9, bold=(idg == 'Total'))  # Green
                                    else:
                                        data_cell.font = Font(color='FF0000', size=9, bold=(idg == 'Total'))  # Red
                                else:
                                    # Other percentages: Green if > 0, Red if < 0
                                    if value > 0:
                                        data_cell.font = Font(color='008000', size=9, bold=(idg == 'Total'))  # Green
                                    elif value < 0:
                                        data_cell.font = Font(color='FF0000', size=9, bold=(idg == 'Total'))  # Red
                                    else:
                                        data_cell.font = Font(size=9, bold=(idg == 'Total'))  # Default for zero
                            else:
                                data_cell.font = Font(size=9, bold=(idg == 'Total'))  # Default for invalid values
                                
                        elif column_name == CURRENT_DISPLAY:
                            # Current/latest month values - NO background color for data cells
                            data_cell.number_format = '#,##0'
                            data_cell.font = Font(size=9, bold=(idg == 'Total'))  # No special background for data
                            
                        else:
                            # Regular value columns
                            data_cell.number_format = '#,##0'
                            data_cell.font = Font(size=9, bold=(idg == 'Total'))
                    
                    current_data_row += 1
            else:
                # Empty data - show "No Data"
                no_data_cell = summary_ws.cell(row=current_data_row, column=start_col, value="No Data")
                no_data_cell.border = table_border
                for col_idx in range(1, len(headers)):
                    cell = summary_ws.cell(row=current_data_row, column=start_col + col_idx, value=0)
                    cell.border = table_border
                    cell.number_format = '#,##0'
                current_data_row += 1
                
        except Exception as e:
            print(f"Error writing {title} table: {e}")
            # Write error row
            error_cell = summary_ws.cell(row=current_data_row, column=start_col, value="Error")
            error_cell.border = table_border
            for col_idx in range(1, len(headers)):
                cell = summary_ws.cell(row=current_data_row, column=start_col + col_idx, value=0)
                cell.border = table_border
                cell.number_format = '#,##0'
    
    # Update current_row to be after all three tables
    max_table_height = max(len(all_idgs_global), len(all_idgs_jumbo), len(all_idgs_ea)) + 1  # +1 for Total row
    current_row = tables_start_row + 3 + max_table_height  # 3 = title + spacing + headers
    
    print("✅ Enhanced Summary Dashboard created with multiple tables!")
    
    # 3. Weekly Totals Summary (Overall)
    print("📊 Creating Weekly Totals Summary...")
    weekly_totals = pd.DataFrame(index=range(1, max_week_global + 1), columns=periods)
    
    for week in range(1, max_week_global + 1):
        week_label = get_week_label(week)
        for period in periods:
            week_period_cols = [(week_label, period)]
            weekly_total = global_idg_pivot[week_period_cols].sum().sum()
            weekly_totals.loc[week, period] = weekly_total
    
    # Add growth percentages using dynamic period names
    weekly_totals = weekly_totals.astype(float)
    weekly_totals['Growth_vs_LastYear_%'] = ((weekly_totals[CURRENT_DISPLAY] - weekly_totals[LAST_YEAR_DISPLAY]) / weekly_totals[LAST_YEAR_DISPLAY] * 100).round(2)
    weekly_totals['Growth_vs_LastMonth_%'] = ((weekly_totals[CURRENT_DISPLAY] - weekly_totals[LAST_MONTH_DISPLAY]) / weekly_totals[LAST_MONTH_DISPLAY] * 100).round(2)
    
    # Add week labels
    weekly_totals.index = [get_week_label(week) for week in range(1, max_week_global + 1)]
    
    # Export weekly totals
    weekly_totals.round(2).to_excel(writer, sheet_name='Weekly_Totals', index=True)

print(f"\n✅ EXCEL EXPORT COMPLETED SUCCESSFULLY!")
print(f"📎 File: {output_file}")
print("\n📊 SHEETS CREATED:")
print("=" * 50)
print("📄 1. Cat_Week_Total - Overall IDG Analysis table")
print("📄 2. Cat_Week_EA - EA Only IDG Analysis table")
print("📄 3. Cat_Week_JumboAE - Jumbo.ae Only IDG Analysis table")
print("📄 4. Cat_Month_Total - Monthly Overall IDG Total Analysis")
print("📄 5. Cat_Month_EA - Monthly EA Only IDG Total Analysis")
print("📄 6. Cat_Month_JumboAE - Monthly Jumbo.ae Only IDG Total Analysis")
print(f"📄 7. Weekly - Overview of all analyses") # Adjusted numbering
print(f"📄 8. Weekly_Totals - Weekly summary with growth")   # Adjusted numbering

print(f"\n🎆 FEATURES APPLIED TO EACH ANALYSIS SHEET:")
print(f"• Dedicated sheet for each analysis: Overall, EA, Jumbo.ae")
print(f"• Professional multi-level header formatting")
print(f"• Color-coded percentages (Green: positive, Red: negative)")
print(f"• Highlighted Total columns and rows")
print(f"• Auto-adjusted column widths for readability")
print(f"• Frozen panes for easy navigation")
print(f"• Consistent number formatting across all sheets")
print(f"• Comprehensive analysis covering all three data types")
print(f"• NEW: Monthly summary sheets with Total column data only")

print(f"\n📊 ANALYSIS COVERAGE:")
print(f"• Overall: {len(all_idgs_global)} IDGs across {max_week_global} weeks")
print(f"• EA Only: {len(all_idgs_ea)} IDGs across {max_week_ea} weeks")
print(f"• Jumbo.ae: {len(all_idgs_jumbo)} IDGs across {max_week_jumbo} weeks")
print(f"• Periods: {', '.join(periods)}")
print(f"• Week calculation based on: {start_day} as first day")

print(f"\n💾 File saved as: {output_file}")
print("=" * 60)

📤 EXPORTING ALL THREE PIVOT TABLES TO SINGLE EXCEL SHEET...
📊 Creating Combined IDG Analysis Sheet...
  📊 Writing OVERALL IDG ANALYSIS (30 days) to sheet: Cat_Week_Total...
  📊 Writing EA ONLY IDG ANALYSIS (30 days) to sheet: Cat_Week_EA...
📊 Creating Combined IDG Analysis Sheet...
  📊 Writing OVERALL IDG ANALYSIS (30 days) to sheet: Cat_Week_Total...
  📊 Writing EA ONLY IDG ANALYSIS (30 days) to sheet: Cat_Week_EA...
  📊 Writing JUMBO.AE ONLY IDG ANALYSIS (30 days) to sheet: Cat_Week_JumboAE...
  📊 Writing MONTHLY OVERALL IDG ANALYSIS (30 days) to sheet: Cat_Month_Total...
  📊 Writing MONTHLY EA ONLY IDG ANALYSIS (30 days) to sheet: Cat_Month_EA...
  📊 Writing MONTHLY JUMBO.AE ONLY IDG ANALYSIS (30 days) to sheet: Cat_Month_JumboAE...
📊 Creating Enhanced Summary Dashboard...
⚠️ Table 2 title cell is already merged, skipping merge operation
✅ Enhanced Summary Dashboard created with multiple tables!
📊 Creating Weekly Totals Summary...
  📊 Writing JUMBO.AE ONLY IDG ANALYSIS (30 days) to 

In [322]:
# Dynamic constants for sessions
LAST_MONTH_SESSION_PATH = sessions_info[0][0]
LAST_MONTH_SESSION_SHEET = sessions_info[0][1]
LAST_MONTH_SESSION_DISPLAY = sessions_info[0][2]

LAST_YEAR_SESSION_PATH = sessions_info[1][0]
LAST_YEAR_SESSION_SHEET = sessions_info[1][1]
LAST_YEAR_SESSION_DISPLAY = sessions_info[1][2]

CURRENT_SESSION_PATH = sessions_info[2][0]
CURRENT_SESSION_SHEET = sessions_info[2][1]
CURRENT_SESSION_DISPLAY = sessions_info[2][2]

# Metrics to include in the pivot table
METRICS_TO_AGGREGATE = ['Sessions', 'Purchases', 'Purchase revenue']

def create_master_sessions_pivot(sessions_info, cg_filter=None):
    """
    Create master sessions pivot table with optional CG column filtering
    
    Parameters:
    sessions_info: List of tuples containing (path, sheet, display_name)
    cg_filter: String indicating filter type:
               - None: No filter (default)
               - "EA_only": Only include "EA" or "Endless Aisle" 
               - "non_EA": Exclude "EA" or "Endless Aisle"
    
    Returns:
    pd.DataFrame: Master pivot table with calculated metrics
    """
    
    print("📊 SESSIONS DATA ANALYSIS - WEEKLY MASTER PIVOT")
    if cg_filter == "EA_only":
        print("🔍 Filter: EA/Endless Aisle ONLY")
    elif cg_filter == "non_EA":
        print("🔍 Filter: NON-EA/Endless Aisle")
    else:
        print("🔍 Filter: NO FILTER")
    print("=" * 60)

    processed_sessions_data = {}
    all_channels = set()
    max_week_overall = 0

    # Read and process sessions data from all three periods
    for i, (path, sheet, display_name) in enumerate(sessions_info):
        print(f"\n📄 Processing {display_name} Sessions Data:")
        print("-" * 40)

        try:
            # Read the sessions data
            sessions_df = pd.read_excel(path, sheet_name=sheet)
            print(f"  Raw Shape: {sessions_df.shape}")
            print(f"  Raw Columns: {list(sessions_df.columns)}")
            
            # Filter out "Gift Card" from Category column if it exists
            if 'Category' in sessions_df.columns:
                sessions_df = sessions_df[sessions_df['Category'] != 'Gift Card ']
                print(f"  Shape after filtering 'Gift Card': {sessions_df.shape}")
            
            # Apply CG filter if specified
            if cg_filter and 'CG' in sessions_df.columns:
                if cg_filter == "EA_only":
                    sessions_df = sessions_df[sessions_df['CG'].isin(['EA', 'Endless Aisle'])]
                    print(f"  Shape after EA filter: {sessions_df.shape}")
                elif cg_filter == "non_EA":
                    sessions_df = sessions_df[~sessions_df['CG'].isin(['EA', 'Endless Aisle'])]
                    print(f"  Shape after non-EA filter: {sessions_df.shape}")
            elif cg_filter and 'CG' not in sessions_df.columns:
                print(f"  Warning: CG column not found, filter '{cg_filter}' cannot be applied")
            
            # Convert the string to datetime format first
            sessions_df['Date'] = pd.to_datetime(sessions_df['Date'], format='%Y%m%d', errors='coerce')

            # Then extract the day
            sessions_df['Day'] = sessions_df['Date'].dt.day
            


            sessions_df = sessions_df.dropna(subset=['Day']) 
            if sessions_df.empty:
                print(f"  No valid 'Day' data after conversion for {display_name}.")
                processed_sessions_data[display_name] = pd.DataFrame()
                continue

            sessions_df['Day'] = sessions_df['Day'].astype(int)

            original_len = len(sessions_df)
            sessions_df = sessions_df[sessions_df['Day'] <= max_invoice_day]
            print(f"📉 {display_name}: Filtered {original_len - len(sessions_df)} rows with Day > {max_invoice_day}")

            # Add WeekNumber column (ensure get_week_number and first_day_position are defined in a previous cell)
            sessions_df['WeekNumber'] = sessions_df['Day'].apply(
                lambda day: get_week_number(day, first_day_position) 
            )
            
            # Check which of the desired metrics are available in the current DataFrame
            available_metrics = [m for m in METRICS_TO_AGGREGATE if m in sessions_df.columns]
            if not available_metrics:
                print(f"  No metrics ({', '.join(METRICS_TO_AGGREGATE)}) found in {display_name}. Skipping.")
                processed_sessions_data[display_name] = pd.DataFrame()
                continue
            
            print(f"  Available metrics for {display_name}: {available_metrics}")

            # Select relevant columns for aggregation
            cols_for_aggregation = ['Channel', 'WeekNumber'] + available_metrics
            temp_df = sessions_df[cols_for_aggregation].copy()

            # Group by Channel and WeekNumber, and sum the available metrics
            aggregated_data_for_period = temp_df.groupby(['Channel', 'WeekNumber'])[available_metrics].sum().fillna(0)
            
            processed_sessions_data[display_name] = aggregated_data_for_period
            if not aggregated_data_for_period.empty:
                all_channels.update(aggregated_data_for_period.index.get_level_values('Channel').unique())
                # Ensure WeekNumber exists in index before calling max()
                if 'WeekNumber' in aggregated_data_for_period.index.names:
                    current_max_week_in_period = aggregated_data_for_period.index.get_level_values('WeekNumber').max()
                    if pd.notna(current_max_week_in_period) and current_max_week_in_period > max_week_overall:
                        max_week_overall = int(current_max_week_in_period)
                else: # Handle case where WeekNumber might not be in index (e.g. if groupby results in empty df for some reason)
                     current_max_week_in_period = 0
            else:
                current_max_week_in_period = 0
            
            print(f"  Processed {display_name} successfully. Max week: {current_max_week_in_period if current_max_week_in_period > 0 else 'N/A'}")

        except Exception as e:
            print(f"❌ Error processing {display_name}: {e}")
            import traceback
            traceback.print_exc()
            processed_sessions_data[display_name] = pd.DataFrame()

    print("\n" + "=" * 60)
    print("📊 CREATING FINAL COMPREHENSIVE SESSIONS MASTER PIVOT TABLE")

    if not processed_sessions_data or not all_channels:
        print("⚠️ No session data successfully processed or no channels found. Cannot create master pivot table.")
        return pd.DataFrame()
    
    sorted_channels = sorted(list(all_channels))
    session_periods_display_names = [LAST_MONTH_SESSION_DISPLAY, LAST_YEAR_SESSION_DISPLAY, CURRENT_SESSION_DISPLAY]
    
    column_tuples = []
    if max_week_overall > 0:
        weeks_for_pivot = list(range(1, int(max_week_overall) + 1))
        
        # Add weekly columns first
        for week_num in weeks_for_pivot:
            week_label = get_week_label(week_num) # Ensure get_week_label is available
            for metric_name in METRICS_TO_AGGREGATE: # Use the full list of desired metrics
                for period_display_name in session_periods_display_names:
                    column_tuples.append((week_label, metric_name, period_display_name))
        
        # Add Total columns at the end (after all weeks)
        for metric_name in METRICS_TO_AGGREGATE:
            for period_display_name in session_periods_display_names:
                column_tuples.append(("Total", metric_name, period_display_name))
    
        if not column_tuples:
             print("⚠️ No weeks or metrics to create columns for. Pivot table will be empty or incomplete.")
             master_sessions_pivot_df = pd.DataFrame(index=sorted_channels)
        else:
            multi_columns = pd.MultiIndex.from_tuples(column_tuples, names=['Week', 'Metric', 'Period'])
            master_sessions_pivot_df = pd.DataFrame(index=sorted_channels, columns=multi_columns)
            master_sessions_pivot_df = master_sessions_pivot_df.fillna(0) # Initialize with 0

            for channel_val in sorted_channels:
                for week_num in weeks_for_pivot:
                    week_label = get_week_label(week_num)
                    for period_display_name in session_periods_display_names:
                        if period_display_name in processed_sessions_data:
                            period_aggregated_data = processed_sessions_data[period_display_name]
                            if not period_aggregated_data.empty and (channel_val, week_num) in period_aggregated_data.index:
                                data_series_for_channel_week = period_aggregated_data.loc[(channel_val, week_num)]
                                for metric_name in METRICS_TO_AGGREGATE:
                                    if metric_name in data_series_for_channel_week.index: # Check if metric was available for this period
                                        value = data_series_for_channel_week[metric_name]
                                        master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
    else:
        print("⚠️ No weeks found in session data across all periods. Cannot create master pivot table.")
        return pd.DataFrame()

    # Calculate CVR and AOV if the DataFrame is not empty and has the required structure
    if not master_sessions_pivot_df.empty and isinstance(master_sessions_pivot_df.columns, pd.MultiIndex) and master_sessions_pivot_df.columns.nlevels == 3:

        # Collect unique (week_label, period_name) combinations that have base metrics
        processed_combinations = set()
        for col_tuple in master_sessions_pivot_df.columns:
            week_label, metric, period_name = col_tuple
            if metric in METRICS_TO_AGGREGATE:
                processed_combinations.add((week_label, period_name))

        for week_label, period_name in processed_combinations:
            sessions_col_tuple = (week_label, 'Sessions', period_name)
            purchases_col_tuple = (week_label, 'Purchases', period_name)
            revenue_col_tuple = (week_label, 'Purchase revenue', period_name)

            # Check if all base metric columns exist for this combination
            if not (sessions_col_tuple in master_sessions_pivot_df.columns and \
                    purchases_col_tuple in master_sessions_pivot_df.columns and \
                    revenue_col_tuple in master_sessions_pivot_df.columns):
                continue

            sessions_s = master_sessions_pivot_df[sessions_col_tuple]
            purchases_s = master_sessions_pivot_df[purchases_col_tuple]
            purchase_revenue_s = master_sessions_pivot_df[revenue_col_tuple]

            # Calculate CVR (Purchases / Sessions)
            cvr = purchases_s / sessions_s
            master_sessions_pivot_df[(week_label, 'CVR', period_name)] = cvr.fillna(0).replace([float('inf'), -float('inf')], 0)

            # Calculate AOV (Purchase Revenue / Purchases)
            aov = purchase_revenue_s / purchases_s
            master_sessions_pivot_df[(week_label, 'AOV', period_name)] = aov.fillna(0).replace([float('inf'), -float('inf')], 0)

        # Calculate Total columns (sum of all weeks for each metric/period)
        print("📊 Calculating Total columns (sum of all weeks)...")
        for period_name in session_periods_display_names:
            for metric_name in METRICS_TO_AGGREGATE:
                # Find all week columns for this metric and period
                week_columns = [(week_label, metric_name, period_name) 
                              for week_label, _, _ in master_sessions_pivot_df.columns 
                              if week_label != "Total" and (week_label, metric_name, period_name) in master_sessions_pivot_df.columns]
                
                if week_columns:
                    # Sum all week columns for this metric/period
                    total_values = master_sessions_pivot_df[week_columns].sum(axis=1)
                    master_sessions_pivot_df[("Total", metric_name, period_name)] = total_values
            
            # Calculate Total CVR and AOV
            total_sessions_col = ("Total", 'Sessions', period_name)
            total_purchases_col = ("Total", 'Purchases', period_name)
            total_revenue_col = ("Total", 'Purchase revenue', period_name)
            
            if (total_sessions_col in master_sessions_pivot_df.columns and 
                total_purchases_col in master_sessions_pivot_df.columns):
                # Calculate Total CVR
                total_sessions = master_sessions_pivot_df[total_sessions_col]
                total_purchases = master_sessions_pivot_df[total_purchases_col]
                total_cvr = total_purchases / total_sessions
                master_sessions_pivot_df[("Total", 'CVR', period_name)] = total_cvr.fillna(0).replace([float('inf'), -float('inf')], 0)
            
            if (total_purchases_col in master_sessions_pivot_df.columns and 
                total_revenue_col in master_sessions_pivot_df.columns):
                # Calculate Total AOV
                total_purchases = master_sessions_pivot_df[total_purchases_col]
                total_revenue = master_sessions_pivot_df[total_revenue_col]
                total_aov = total_revenue / total_purchases
                master_sessions_pivot_df[("Total", 'AOV', period_name)] = total_aov.fillna(0).replace([float('inf'), -float('inf')], 0)

        # Re-sort columns to ensure proper ordering: weeks first, then Total, with proper metric ordering
        all_metrics_ordered = METRICS_TO_AGGREGATE + ['CVR', 'AOV']
        
        # Get unique week labels and sort them (excluding Total)
        unique_week_labels = [col[0] for col in master_sessions_pivot_df.columns if col[0] != "Total"]
        unique_week_labels_sorted = sorted(list(set(unique_week_labels)))

        # Get ordered periods
        period_order_from_sessions_info = [s_info[2] for s_info in sessions_info] 
        actual_periods_in_df = list(master_sessions_pivot_df.columns.get_level_values('Period').unique())
        ordered_periods = [p for p in period_order_from_sessions_info if p in actual_periods_in_df]
        for p_df in actual_periods_in_df:
            if p_df not in ordered_periods:
                ordered_periods.append(p_df)

        new_column_tuples = []
        
        # Add all week columns first
        for week_l in unique_week_labels_sorted:
            for metric_n in all_metrics_ordered:
                for period_dn in ordered_periods: 
                    if (week_l, metric_n, period_dn) in master_sessions_pivot_df.columns:
                        new_column_tuples.append((week_l, metric_n, period_dn))
        
        # Add Total columns at the end
        for metric_n in all_metrics_ordered:
            for period_dn in ordered_periods: 
                if ("Total", metric_n, period_dn) in master_sessions_pivot_df.columns:
                    new_column_tuples.append(("Total", metric_n, period_dn))
        
        if new_column_tuples:
            master_sessions_pivot_df = master_sessions_pivot_df.reindex(columns=pd.MultiIndex.from_tuples(new_column_tuples))
    
    # Add Grand Total row
    if not master_sessions_pivot_df.empty:
        print("📊 Adding Grand Total row...")
        
        # Create grand total row
        grand_total_row = pd.DataFrame(index=['Grand Total'], columns=master_sessions_pivot_df.columns)
        
        # Calculate totals for each column
        for col in master_sessions_pivot_df.columns:
            week_label, metric, period_name = col
            
            if metric in ['Sessions', 'Purchases', 'Purchase revenue']:
                # Sum these metrics
                grand_total_row.loc['Grand Total', col] = master_sessions_pivot_df[col].sum()
            elif metric == 'CVR':
                # Calculate overall CVR = Total Purchases / Total Sessions
                if week_label == "Total":
                    # For Total column, use the Total Sessions and Total Purchases
                    total_sessions_col = ("Total", 'Sessions', period_name)
                    total_purchases_col = ("Total", 'Purchases', period_name)
                else:
                    # For week columns, use that specific week
                    total_sessions_col = (week_label, 'Sessions', period_name)
                    total_purchases_col = (week_label, 'Purchases', period_name)
                
                if total_sessions_col in master_sessions_pivot_df.columns and total_purchases_col in master_sessions_pivot_df.columns:
                    total_sessions = master_sessions_pivot_df[total_sessions_col].sum()
                    total_purchases = master_sessions_pivot_df[total_purchases_col].sum()
                    
                    if total_sessions > 0:
                        grand_total_row.loc['Grand Total', col] = total_purchases / total_sessions
                    else:
                        grand_total_row.loc['Grand Total', col] = 0
                else:
                    grand_total_row.loc['Grand Total', col] = 0
            elif metric == 'AOV':
                # Calculate overall AOV = Total Revenue / Total Purchases
                if week_label == "Total":
                    # For Total column, use the Total Revenue and Total Purchases
                    total_revenue_col = ("Total", 'Purchase revenue', period_name)
                    total_purchases_col = ("Total", 'Purchases', period_name)
                else:
                    # For week columns, use that specific week
                    total_revenue_col = (week_label, 'Purchase revenue', period_name)
                    total_purchases_col = (week_label, 'Purchases', period_name)
                
                if total_revenue_col in master_sessions_pivot_df.columns and total_purchases_col in master_sessions_pivot_df.columns:
                    total_revenue = master_sessions_pivot_df[total_revenue_col].sum()
                    total_purchases = master_sessions_pivot_df[total_purchases_col].sum()
                    
                    if total_purchases > 0:
                        grand_total_row.loc['Grand Total', col] = total_revenue / total_purchases
                    else:
                        grand_total_row.loc['Grand Total', col] = 0
                else:
                    grand_total_row.loc['Grand Total', col] = 0
        
        # Append grand total row to the main dataframe
        master_sessions_pivot_df = pd.concat([master_sessions_pivot_df, grand_total_row.fillna(0)])
        print("✅ Grand Total row added successfully")
    
    return master_sessions_pivot_df.fillna(0)

# Generate all three pivot tables
print("🚀 GENERATING ALL THREE PIVOT TABLES")
print("=" * 80)

# 1. No filter pivot
print("\n1️⃣ CREATING NO FILTER PIVOT TABLE")
master_sessions_pivot_no_filter = create_master_sessions_pivot(sessions_info, cg_filter=None)

# 2. EA only pivot
print("\n2️⃣ CREATING EA ONLY PIVOT TABLE")
master_sessions_pivot_ea_only = create_master_sessions_pivot(sessions_info, cg_filter="EA_only")

# 3. Non-EA pivot
print("\n3️⃣ CREATING NON-EA PIVOT TABLE")
master_sessions_pivot_non_ea = create_master_sessions_pivot(sessions_info, cg_filter="non_EA")

# Display all three pivot tables
print("\n" + "=" * 80)
print("📊 DISPLAYING ALL PIVOT TABLES")
print("=" * 80)

print("\n🔍 NO FILTER PIVOT TABLE:")
print("-" * 40)
display(master_sessions_pivot_no_filter)

print("\n🔍 EA ONLY PIVOT TABLE:")
print("-" * 40)
display(master_sessions_pivot_ea_only)

print("\n🔍 NON-EA PIVOT TABLE:")
print("-" * 40)
display(master_sessions_pivot_non_ea)

🚀 GENERATING ALL THREE PIVOT TABLES

1️⃣ CREATING NO FILTER PIVOT TABLE
📊 SESSIONS DATA ANALYSIS - WEEKLY MASTER PIVOT
🔍 Filter: NO FILTER

📄 Processing May 25 Sessions Data:
----------------------------------------
  Raw Shape: (9555, 16)
  Raw Columns: ['Day', 'Combined', 'Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases', 'Purchase revenue', 'CG', 'Category', 'Unnamed: 13', 20250501, datetime.datetime(2025, 5, 1, 0, 0)]
  Shape after filtering 'Gift Card': (8774, 16)
📉 May 25: Filtered 273 rows with Day > 30
  Available metrics for May 25: ['Sessions', 'Purchases', 'Purchase revenue']
  Processed May 25 successfully. Max week: 6

📄 Processing June 24 Sessions Data:
----------------------------------------
  Raw Shape: (9555, 16)
  Raw Columns: ['Day', 'Combined', 'Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases',

  master_sessions_pivot_df = master_sessions_pivot_df.fillna(0) # Initialize with 0
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_session

  Raw Shape: (9555, 16)
  Raw Columns: ['Day', 'Combined', 'Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases', 'Purchase revenue', 'CG', 'Category', 'Unnamed: 13', 20250501, datetime.datetime(2025, 5, 1, 0, 0)]
  Shape after filtering 'Gift Card': (8774, 16)
  Shape after EA filter: (1577, 16)
📉 May 25: Filtered 44 rows with Day > 30
  Available metrics for May 25: ['Sessions', 'Purchases', 'Purchase revenue']
  Processed May 25 successfully. Max week: 6

📄 Processing June 24 Sessions Data:
----------------------------------------
  Raw Shape: (9317, 11)
  Raw Columns: ['Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases', 'Purchase revenue', 'CG', 'Category']
  Shape after filtering 'Gift Card': (9302, 11)
  Shape after EA filter: (1560, 11)
📉 June 24: Filtered 0 rows with Day > 30
  Available metrics for June 24: ['S

  master_sessions_pivot_df = master_sessions_pivot_df.fillna(0) # Initialize with 0
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_session

  Raw Shape: (9555, 16)
  Raw Columns: ['Day', 'Combined', 'Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases', 'Purchase revenue', 'CG', 'Category', 'Unnamed: 13', 20250501, datetime.datetime(2025, 5, 1, 0, 0)]
  Shape after filtering 'Gift Card': (8774, 16)
  Shape after non-EA filter: (7197, 16)
📉 May 25: Filtered 229 rows with Day > 30
  Available metrics for May 25: ['Sessions', 'Purchases', 'Purchase revenue']
  Processed May 25 successfully. Max week: 6

📄 Processing June 24 Sessions Data:
----------------------------------------
  Raw Shape: (9317, 11)
  Raw Columns: ['Date', 'Transaction ID', 'Session campaign', 'Session Channel Group - 1', 'Channel', 'Session source / medium', 'Sessions', 'Purchases', 'Purchase revenue', 'CG', 'Category']
  Shape after filtering 'Gift Card': (9302, 11)
  Shape after non-EA filter: (7742, 11)
📉 June 24: Filtered 0 rows with Day > 30
  Available metrics for Jun

  master_sessions_pivot_df = master_sessions_pivot_df.fillna(0) # Initialize with 0
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_sessions_pivot_df.loc[channel_val, (week_label, metric_name, period_display_name)] = value
  master_session

Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,...,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,Sessions,Sessions,Sessions,Purchases,Purchases,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,...,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,CVR,CVR,AOV,AOV,AOV
Unnamed: 0_level_2,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,...,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25
Affiliates,1012,1186,717,7,0,1,19642.999999,0.0,1199.0,0.006917,...,2100,6194199.0,1021255.0,5628023.0,0.006751,0.001159,0.006548,3081.690821,2063.142424,2680.011071
Display,689,511,28,0,0,0,0.0,0.0,0.0,0.0,...,0,106470.0,33375.0,0.0,2e-05,0.000528,0.0,1774.5,1112.499999,0.0
Email,170,3816,1513,0,5,5,0.0,13143.849998,4938.000004,0.0,...,255,731413.5,852774.8,399795.0,0.000997,0.000914,0.000863,2031.704167,1579.2125,1567.82353
Organic,13974,13812,17993,61,100,36,195768.489993,262169.019968,87858.000014,0.004365,...,17205,52497650.0,55177980.0,38944640.0,0.003205,0.004589,0.001717,2255.053602,2524.73024,2263.565205
Paid Other,15,0,143,0,0,0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Paid Perf,6917,12117,8706,29,35,18,52385.850016,94237.799997,34743.000005,0.004193,...,9315,28103660.0,14956870.0,18677160.0,0.004627,0.001613,0.003269,2023.301847,2153.616739,2005.062319
Paid Social,3,121,3,0,0,0,0.0,0.0,0.0,0.0,...,0,68175.0,22335.0,0.0,0.002747,0.000283,0.0,1515.000001,1489.000001,0.0
Grand Total,22780,31563,29103,97,140,60,267797.340008,369550.669963,128738.000023,0.004258,...,28875,87701570.0,72064590.0,63649610.0,0.00283,0.002931,0.002132,2212.172217,2411.800131,2204.315605



🔍 EA ONLY PIVOT TABLE:
----------------------------------------


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,...,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,Sessions,Sessions,Sessions,Purchases,Purchases,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,...,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,CVR,CVR,AOV,AOV,AOV
Unnamed: 0_level_2,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,...,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25
Affiliates,0,0,0,0,0,0,0.0,0.0,0.0,0.0,...,75,287160.0,209085.0,137475.0,1.0,1.0,1.0,3190.666667,1991.285715,1833.0
Display,0,0,0,0,0,0,0.0,0.0,0.0,0.0,...,0,25485.0,27435.0,0.0,1.0,1.0,0.0,1699.0,1828.999999,0.0
Email,0,0,0,0,0,0,0.0,0.0,0.0,0.0,...,30,76440.0,106468.5,115470.0,1.0,1.0,1.0,1274.000001,3548.950001,3849.0
Organic,159,176,208,28,76,23,49259.639994,236663.899974,67376.000012,0.176101,...,11325,29580900.0,42698380.0,26379560.0,0.165396,0.16232,0.124649,2164.720264,2628.401191,2329.320821
Paid Perf,4,18,7,4,18,7,8696.000002,53978.999994,14693.000001,1.0,...,2865,7366222.0,5719648.0,5423891.0,1.0,1.0,1.0,2116.730345,2629.723242,1893.155497
Grand Total,163,194,215,32,94,30,57955.639996,290642.899968,82069.000013,0.196319,...,14295,37336210.0,48761010.0,32056390.0,0.200661,0.181339,0.152358,2156.915598,2625.795041,2242.489948



🔍 NON-EA PIVOT TABLE:
----------------------------------------


Unnamed: 0_level_0,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,Week 1,...,Total,Total,Total,Total,Total,Total,Total,Total,Total,Total
Unnamed: 0_level_1,Sessions,Sessions,Sessions,Purchases,Purchases,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,...,Purchases,Purchase revenue,Purchase revenue,Purchase revenue,CVR,CVR,CVR,AOV,AOV,AOV
Unnamed: 0_level_2,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,...,June 25,May 25,June 24,June 25,May 25,June 24,June 25,May 25,June 24,June 25
Affiliates,1012,1186,717,7,0,1,19642.999999,0.0,1199.0,0.006917,...,2025,5907039.0,812170.5,5490548.0,0.006451,0.000913,0.006316,3076.582578,2082.488461,2711.381852
Display,689,511,28,0,0,0,0.0,0.0,0.0,0.0,...,0,80985.0,5940.0,0.0,1.5e-05,0.000264,0.0,1799.666667,395.999999,0.0
Email,170,3816,1513,0,5,5,0.0,13143.849998,4938.000004,0.0,...,225,654973.5,746306.3,284325.0,0.000831,0.000863,0.000762,2183.245,1463.345588,1263.666667
Organic,13815,13636,17785,33,24,13,146508.849999,25505.119994,20482.000002,0.002389,...,5880,22916750.0,12479600.0,12565080.0,0.001339,0.001203,0.000592,2383.436864,2224.527995,2136.918546
Paid Other,15,0,143,0,0,0,0.0,0.0,0.0,0.0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Paid Perf,6913,12099,8699,25,17,11,43689.850014,40258.800003,20050.000004,0.003616,...,6450,20737440.0,9237220.0,13253260.0,0.003471,0.001109,0.002266,1992.069265,1936.524151,2054.769767
Paid Social,3,121,3,0,0,0,0.0,0.0,0.0,0.0,...,0,68175.0,22335.0,0.0,0.002747,0.000283,0.0,1515.000001,1489.000001,0.0
Grand Total,22617,31369,28888,65,46,30,209841.700012,78907.769995,46669.00001,0.002874,...,14580,50365360.0,23303570.0,31593220.0,0.001604,0.001121,0.001084,2254.997025,2060.439788,2166.887469


In [323]:
def write_sessions_summary_to_dashboard(sessions_pivot_df, analysis_name="Sessions JumboAE"):
    """
    Write final/highest week data from sessions pivot to Weekly sheet with proper multi-level headers
    """
    print(f"Writing sessions summary data to Weekly for: {analysis_name}")
    
    try:
        if "Weekly" in wb.sheetnames:
            # Get the existing Summary Dashboard sheet
            summary_ws = wb["Weekly"]
            
            # Find the last row with data
            last_row = summary_ws.max_row
            
            # Find the first empty row for new section
            start_row = last_row + 3  # Leave some spacing
            
            # Define styles consistent with existing dashboard
            section_header_font = Font(bold=True, size=14, color='FFFFFF')
            section_header_fill = PatternFill(start_color='1F4E79', end_color='1F4E79', fill_type='solid')
            
            column_header_font = Font(bold=True, size=10, color='FFFFFF')
            column_header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
            
            subheader_font = Font(bold=True, size=10, color='FFFFFF')
            subheader_fill = PatternFill(start_color='5B9BD5', end_color='5B9BD5', fill_type='solid')
            
            row_header_font = Font(bold=True, size=9)
            row_header_fill = PatternFill(start_color='F2F2F2', end_color='F2F2F2', fill_type='solid')
            
            total_row_font = Font(bold=True, size=10)
            total_row_fill = PatternFill(start_color='FFE699', end_color='FFE699', fill_type='solid')
            
            thin_border = Border(
                left=Side(style='thin'), right=Side(style='thin'),
                top=Side(style='thin'), bottom=Side(style='thin')
            )
            
            # Find the highest week in the sessions pivot
            if not sessions_pivot_df.empty:
                # Extract week columns
                week_columns = [col for col in sessions_pivot_df.columns if col[0] != 'Total']
                
                if week_columns:
                    # Extract week numbers and find the maximum
                    week_numbers = []
                    for col in week_columns:
                        week_label = col[0]
                        if 'Week' in week_label:
                            try:
                                week_num = int(week_label.split(' ')[1])
                                week_numbers.append(week_num)
                            except:
                                pass
                    
                    if week_numbers:
                        max_week = max(week_numbers)
                        max_week_label = f"Week {max_week}"
                        
                        # Get available metrics and periods for this week
                        available_metrics = []
                        available_periods = []
                        
                        for col in sessions_pivot_df.columns:
                            if col[0] == max_week_label:
                                if col[1] not in available_metrics:
                                    available_metrics.append(col[1])
                                if col[2] not in available_periods:
                                    available_periods.append(col[2])
                        
                        # Filter out last month period and identify current/latest period
                        filtered_periods = []
                        latest_period = None
                        
                        for period in available_periods:
                            # Skip last month period - identify by common patterns
                            is_last_month = (
                                hasattr(locals(), 'LAST_MONTH_SESSION_DISPLAY') and LAST_MONTH_SESSION_DISPLAY in period or
                                'May24' in period or 'May 24' in period or 
                                any(month in period.lower() for month in ['may', 'april', 'mar']) and '24' in period
                            )
                            
                            if not is_last_month:
                                filtered_periods.append(period)
                                
                                # Identify latest/current period for highlighting
                                is_current = (
                                    hasattr(locals(), 'CURRENT_SESSION_DISPLAY') and CURRENT_SESSION_DISPLAY in period or
                                    'Jun25' in period or 'June 25' in period or '2025' in period
                                )
                                if is_current:
                                    latest_period = period
                        
                        # Update available_periods to use filtered list
                        available_periods = filtered_periods
                        
                        # Add "v/s Last Year %" as a calculated metric
                        # We'll add this column for each metric that can be compared
                        comparable_metrics = ['Sessions', 'Purchases', 'Purchase revenue']
                        
                        # Calculate total columns needed (existing + calculated percentage columns)
                        additional_pct_columns = len([m for m in available_metrics if m in comparable_metrics])
                        total_columns = 1 + (len(available_metrics) * len(available_periods)) + additional_pct_columns  # 1 for channel column
                        
                        # Create section header
                        summary_ws.cell(row=start_row, column=1, value=f"SESSIONS ANALYSIS - {analysis_name} ({max_week_label})")
                        header_cell = summary_ws.cell(row=start_row, column=1)
                        header_cell.font = section_header_font
                        header_cell.fill = section_header_fill
                        header_cell.alignment = Alignment(horizontal='center', vertical='center')
                        
                        # Merge cells for the main header
                        summary_ws.merge_cells(start_row=start_row, start_column=1, end_row=start_row, end_column=total_columns)
                        
                        # Apply borders to header
                        for col_idx in range(1, total_columns + 1):
                            cell = summary_ws.cell(row=start_row, column=col_idx)
                            cell.border = thin_border
                        
                        current_row = start_row + 1
                        
                        # Row 1: Metric headers (top level) - similar to other pivot tables
                        col_idx = 2  # Start from column 2 (column 1 is for channel names)
                        metric_start_columns = {}
                        
                        for metric in available_metrics:
                            metric_start_columns[metric] = col_idx
                            metric_cols = len(available_periods)
                            
                            # Write metric header
                            summary_ws.cell(row=current_row, column=col_idx, value=metric)
                            metric_cell = summary_ws.cell(row=current_row, column=col_idx)
                            metric_cell.font = column_header_font
                            metric_cell.fill = column_header_fill
                            metric_cell.alignment = Alignment(horizontal='center', vertical='center')
                            metric_cell.border = thin_border
                            
                            # Merge cells for this metric if it has multiple periods
                            if metric_cols > 1:
                                summary_ws.merge_cells(start_row=current_row, start_column=col_idx, 
                                                      end_row=current_row, end_column=col_idx + metric_cols - 1)
                                
                                # Apply styles to all merged cells
                                for i in range(metric_cols):
                                    merged_cell = summary_ws.cell(row=current_row, column=col_idx + i)
                                    merged_cell.border = thin_border
                                    merged_cell.font = column_header_font
                                    merged_cell.fill = column_header_fill
                            
                            col_idx += metric_cols
                        
                        # Add percentage columns for comparable metrics
                        for metric in available_metrics:
                            if metric in comparable_metrics:
                                summary_ws.cell(row=current_row, column=col_idx, value=f"{metric} v/s LY %")
                                pct_header_cell = summary_ws.cell(row=current_row, column=col_idx)
                                pct_header_cell.font = column_header_font
                                pct_header_cell.fill = PatternFill(start_color='8B4513', end_color='8B4513', fill_type='solid')  # Brown color to distinguish
                                pct_header_cell.alignment = Alignment(horizontal='center', vertical='center')
                                pct_header_cell.border = thin_border
                                col_idx += 1
                        
                        # Add "Channel" header for first column
                        summary_ws.cell(row=current_row, column=1, value="Channel")
                        channel_header = summary_ws.cell(row=current_row, column=1)
                        channel_header.font = column_header_font
                        channel_header.fill = column_header_fill
                        channel_header.alignment = Alignment(horizontal='center', vertical='center')
                        channel_header.border = thin_border
                        
                        # Merge channel header across both header rows
                        summary_ws.merge_cells(start_row=current_row, start_column=1, 
                                              end_row=current_row + 1, end_column=1)
                        
                        # Row 2: Period headers (bottom level)
                        current_row += 1
                        col_idx = 2
                        
                        for metric in available_metrics:
                            for period in available_periods:
                                summary_ws.cell(row=current_row, column=col_idx, value=period)
                                period_cell = summary_ws.cell(row=current_row, column=col_idx)
                                period_cell.font = subheader_font
                                
                                # Highlight latest period with different color
                                if period == latest_period:
                                    period_cell.fill = PatternFill(start_color='FFD700', end_color='FFD700', fill_type='solid')  # Gold highlight for latest
                                    period_cell.font = Font(bold=True, size=10, color='000000')  # Black text on gold background
                                else:
                                    period_cell.fill = subheader_fill
                                
                                period_cell.alignment = Alignment(horizontal='center', vertical='center')
                                period_cell.border = thin_border
                                col_idx += 1
                        
                        # Add period headers for percentage columns
                        for metric in available_metrics:
                            if metric in comparable_metrics:
                                summary_ws.cell(row=current_row, column=col_idx, value="Growth %")
                                pct_period_cell = summary_ws.cell(row=current_row, column=col_idx)
                                pct_period_cell.font = subheader_font
                                pct_period_cell.fill = PatternFill(start_color='D2691E', end_color='D2691E', fill_type='solid')  # Chocolate color
                                pct_period_cell.alignment = Alignment(horizontal='center', vertical='center')
                                pct_period_cell.border = thin_border
                                col_idx += 1
                        
                        # Move to data rows
                        current_row += 1
                        
                        # Write data for each channel
                        for channel in sessions_pivot_df.index:
                            # Write channel name
                            summary_ws.cell(row=current_row, column=1, value=str(channel))
                            channel_cell = summary_ws.cell(row=current_row, column=1)
                            
                            # Special formatting for Grand Total
                            if channel == "Grand Total":
                                channel_cell.font = total_row_font
                                channel_cell.fill = total_row_fill
                            else:
                                channel_cell.font = row_header_font
                                channel_cell.fill = row_header_fill
                            
                            channel_cell.alignment = Alignment(horizontal='left', vertical='center')
                            channel_cell.border = thin_border
                            
                            # Write metric values for all periods
                            col_idx = 2
                            for metric in available_metrics:
                                for period in available_periods:
                                    if (max_week_label, metric, period) in sessions_pivot_df.columns:
                                        value = sessions_pivot_df.loc[channel, (max_week_label, metric, period)]
                                    else:
                                        value = 0
                                    
                                    summary_ws.cell(row=current_row, column=col_idx, value=value)
                                    data_cell = summary_ws.cell(row=current_row, column=col_idx)
                                    
                                    # Apply formatting based on channel and metric type
                                    if channel == "Grand Total":
                                        data_cell.font = Font(bold=True, size=9)
                                        data_cell.fill = total_row_fill
                                    else:
                                        data_cell.font = Font(size=9)
                                    
                                    # Format numbers based on metric type
                                    if 'CVR' in metric:
                                        data_cell.number_format = '0"%"'
                                    elif 'AOV' in metric or 'revenue' in metric.lower():
                                        data_cell.number_format = '#,##0'
                                    else:
                                        data_cell.number_format = '#,##0'
                                    
                                    data_cell.alignment = Alignment(horizontal='right', vertical='center')
                                    data_cell.border = thin_border
                                    col_idx += 1
                            
                            # Calculate and write percentage columns for comparable metrics
                            for metric in available_metrics:
                                if metric in comparable_metrics:
                                    # Find current and last year values for this metric
                                    current_value = 0
                                    last_year_value = 0
                                    
                                    # Try to find current and last year values from available periods
                                    for period in available_periods:
                                        if (max_week_label, metric, period) in sessions_pivot_df.columns:
                                            period_value = sessions_pivot_df.loc[channel, (max_week_label, metric, period)]
                                            
                                            # Identify current and last year periods based on the global constants
                                            if hasattr(locals(), 'CURRENT_SESSION_DISPLAY') and CURRENT_SESSION_DISPLAY in period:
                                                current_value = period_value
                                            elif hasattr(locals(), 'LAST_YEAR_SESSION_DISPLAY') and LAST_YEAR_SESSION_DISPLAY in period:
                                                last_year_value = period_value
                                            # Alternative identification if constants not available
                                            elif 'Jun25' in period or 'June 25' in period or '2025' in period:
                                                current_value = period_value
                                            elif 'Jun24' in period or 'June 24' in period or '2024' in period:
                                                last_year_value = period_value
                                    
                                    # Calculate percentage change
                                    if last_year_value != 0:
                                        percentage_change = ((current_value - last_year_value) / last_year_value) * 100
                                    elif current_value > 0:
                                        percentage_change = 100  # 100% growth from 0
                                    else:
                                        percentage_change = 0
                                    
                                    # Write percentage value
                                    summary_ws.cell(row=current_row, column=col_idx, value=percentage_change)
                                    pct_cell = summary_ws.cell(row=current_row, column=col_idx)
                                    
                                    # Apply formatting based on channel type
                                    if channel == "Grand Total":
                                        base_font = Font(bold=True, size=9)
                                        pct_cell.fill = total_row_fill
                                    else:
                                        base_font = Font(size=9)
                                    
                                    # Apply color formatting based on percentage value
                                    if percentage_change > 0:
                                        # Green for positive growth
                                        pct_cell.font = Font(color='008000', bold=channel=="Grand Total", size=9)
                                    elif percentage_change < 0:
                                        # Red for negative growth
                                        pct_cell.font = Font(color='FF0000', bold=channel=="Grand Total", size=9)
                                    else:
                                        # Default color for zero growth
                                        pct_cell.font = base_font
                                    
                                    pct_cell.number_format = '0"%"'
                                    pct_cell.alignment = Alignment(horizontal='right', vertical='center')
                                    pct_cell.border = thin_border
                                    col_idx += 1
                            
                            current_row += 1
                        
                        # Auto-adjust column widths
                        for col_idx in range(1, total_columns + 1):
                            col_letter = get_column_letter(col_idx)
                            max_length = 0
                            for row_idx in range(start_row, current_row):
                                cell = summary_ws.cell(row=row_idx, column=col_idx)
                                if cell.value:
                                    try:
                                        cell_length = len(str(cell.value))
                                        if cell_length > max_length:
                                            max_length = cell_length
                                    except:
                                        pass
                            
                            adjusted_width = min(max_length + 3, 25)
                            summary_ws.column_dimensions[col_letter].width = adjusted_width
                        
                        print(f"✅ Sessions summary data for {analysis_name} written to Weekly successfully!")
                        
                    else:
                        print(f"❌ No valid week numbers found in sessions pivot for {analysis_name}")
                else:
                    print(f"❌ No week columns found in sessions pivot for {analysis_name}")
            else:
                print(f"❌ Sessions pivot data is empty for {analysis_name}")
                
        else:
            print("❌ Weekly sheet not found in workbook")
            
    except Exception as e:
        print(f"❌ Error writing sessions summary to dashboard: {str(e)}")
        import traceback
        traceback.print_exc()

In [324]:
# Export sessions pivot tables to Excel with professional formatting
print("📤 EXPORTING BEAUTIFULLY FORMATTED SESSIONS PIVOT TABLES TO EXCEL...")
print("=" * 80)

output_file = 'IDG_Weekly_Analysis_Combined.xlsx'

try:
    # Load existing workbook
    from openpyxl import load_workbook
    from openpyxl.styles import Font, PatternFill, Border, Side, Alignment, numbers
    from openpyxl.utils import get_column_letter
    wb = load_workbook(output_file)
    
    # Define styles for formatting consistency with previous tables
    section_header_font = Font(bold=True, size=14, color='FFFFFF')
    section_header_fill = PatternFill(start_color='1F4E79', end_color='1F4E79', fill_type='solid')
    
    column_header_font = Font(bold=True, size=10, color='FFFFFF')
    column_header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
    
    subheader_font = Font(bold=True, size=10, color='FFFFFF')
    subheader_fill = PatternFill(start_color='5B9BD5', end_color='5B9BD5', fill_type='solid')
    
    row_header_font = Font(bold=True, size=9)
    row_header_fill = PatternFill(start_color='F2F2F2', end_color='F2F2F2', fill_type='solid')
    
    total_row_font = Font(bold=True, size=10)
    total_row_fill = PatternFill(start_color='FFE699', end_color='FFE699', fill_type='solid')
    
    thin_border = Border(
        left=Side(style='thin'), right=Side(style='thin'),
        top=Side(style='thin'), bottom=Side(style='thin')
    )
    
    def write_sessions_monthly_total_to_sheet(pivot_df, sheet_name):
        """
        Write only the Total column from sessions pivot to monthly sheet with multi-level headers
        """
        print(f"Writing monthly sessions total data to sheet: {sheet_name}")
        
        if sheet_name in wb.sheetnames:
            # Get the existing sheet
            ws = wb[sheet_name]
            
            # Extract only the Total column data
            total_data = pivot_df["Total"]
            
            # Find the first empty row
            last_row = ws.max_row
            start_row = last_row + 3  # Leave two blank rows for spacing
            
            # Determine column spans for Total data
            total_columns = len(total_data.columns) + 1  # +1 for the row headers column
            
            # Write section header with professional formatting
            ws.cell(row=start_row, column=1, value="SESSIONS MONTHLY ANALYSIS")
            header_cell = ws.cell(row=start_row, column=1)
            header_cell.font = section_header_font
            header_cell.fill = section_header_fill
            header_cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Merge cells for the header
            ws.merge_cells(start_row=start_row, start_column=1, end_row=start_row, end_column=total_columns)
            
            # Apply borders to all merged header cells
            for col_idx in range(1, total_columns + 1):
                cell = ws.cell(row=start_row, column=col_idx)
                cell.border = thin_border
            
            # Track current row after header
            current_row = start_row + 1
            
            # Create multi-level headers
            # First, organize the columns into a hierarchy: Metric -> Period
            header_hierarchy = {}
            for metric_period in total_data.columns:
                metric, period = metric_period
                if metric not in header_hierarchy:
                    header_hierarchy[metric] = []
                header_hierarchy[metric].append(period)
            
            # Row 1: Metric headers (top level)
            col_idx = 2  # Start from column 2 (column 1 is for row labels)
            metric_start_columns = {}  # To track where each metric starts
            
            for metric in header_hierarchy:
                metric_start_columns[metric] = col_idx
                
                # Calculate total columns for this metric
                metric_periods = header_hierarchy[metric]
                metric_cols = len(metric_periods)
                
                # Write metric header and merge cells
                ws.cell(row=current_row, column=col_idx, value=metric)
                metric_cell = ws.cell(row=current_row, column=col_idx)
                metric_cell.font = column_header_font
                metric_cell.fill = column_header_fill
                metric_cell.alignment = Alignment(horizontal='center', vertical='center')
                metric_cell.border = thin_border
                
                # Merge cells if needed
                if metric_cols > 1:
                    ws.merge_cells(start_row=current_row, start_column=col_idx, 
                                  end_row=current_row, end_column=col_idx + metric_cols - 1)
                    
                    # Apply styles to all merged cells
                    for i in range(metric_cols):
                        merged_cell = ws.cell(row=current_row, column=col_idx + i)
                        merged_cell.border = thin_border
                        merged_cell.font = column_header_font
                        merged_cell.fill = column_header_fill
                
                # Move to next position
                col_idx += metric_cols
            
            # Add "Channel" header for first column
            ws.cell(row=current_row, column=1, value="Channel")
            channel_header = ws.cell(row=current_row, column=1)
            channel_header.font = column_header_font
            channel_header.fill = column_header_fill
            channel_header.alignment = Alignment(horizontal='center', vertical='center')
            channel_header.border = thin_border
            ws.merge_cells(start_row=current_row, start_column=1, 
                          end_row=current_row + 1, end_column=1)  # Merge across both header rows
            
            # Row 2: Period headers (bottom level)
            current_row += 1
            col_idx = 2  # Reset column index
            
            for metric_period in total_data.columns:
                period = metric_period[1]  # Second part (period)
                
                # Write period header
                ws.cell(row=current_row, column=col_idx, value=period)
                period_cell = ws.cell(row=current_row, column=col_idx)
                period_cell.font = subheader_font
                period_cell.fill = subheader_fill
                period_cell.alignment = Alignment(horizontal='center', vertical='center')
                period_cell.border = thin_border
                
                col_idx += 1
            
            # Move to next row for data
            current_row += 1
            
            # Write data rows
            for channel in total_data.index:
                # Write channel name
                ws.cell(row=current_row, column=1, value=str(channel))
                row_header_cell = ws.cell(row=current_row, column=1)
                
                # Special formatting for Grand Total
                if channel == "Grand Total":
                    row_header_cell.font = total_row_font
                    row_header_cell.fill = total_row_fill
                else:
                    row_header_cell.font = row_header_font
                    row_header_cell.fill = row_header_fill
                    
                row_header_cell.alignment = Alignment(horizontal='left', vertical='center')
                row_header_cell.border = thin_border
                
                # Write data values
                col_idx = 2
                for metric_period in total_data.columns:
                    value = total_data.loc[channel, metric_period]
                    ws.cell(row=current_row, column=col_idx, value=value)
                    data_cell = ws.cell(row=current_row, column=col_idx)
                    
                    # Apply cell formatting based on content
                    if channel == "Grand Total":
                        data_cell.font = Font(bold=True, size=9)
                        data_cell.fill = total_row_fill
                    else:
                        data_cell.font = Font(size=9)
                    
                    # Format numbers based on metric type
                    metric = metric_period[0]  # First part (metric)
                    period = metric_period[1]  # Second part (period)
                    
                    if 'CVR' in metric:
                        data_cell.number_format = '0"%"'
                    elif 'AOV' in metric:
                        data_cell.number_format = '#,##0'
                    elif 'revenue' in metric.lower():
                        data_cell.number_format = '#,##0'
                    elif '%' in period:  # Growth percentages
                        data_cell.number_format = '0"%"'
                        # Color code growth percentages
                        if isinstance(value, (int, float)) and value != float('inf') and value != float('-inf') and value != 0:
                            if value > 0:
                                data_cell.font = Font(color='008000', size=9, bold=channel=='Grand Total')
                            elif value < 0:
                                data_cell.font = Font(color='FF0000', size=9, bold=channel=='Grand Total')
                    else:
                        data_cell.number_format = '#,##0'
                    
                    # Apply borders and alignment
                    data_cell.alignment = Alignment(horizontal='right', vertical='center')
                    data_cell.border = thin_border
                    
                    col_idx += 1
                
                current_row += 1
            
            # Auto-adjust column widths for better readability
            for col_idx in range(1, total_columns + 1):
                col_letter = get_column_letter(col_idx)
                max_length = 0
                for row_idx in range(start_row, current_row):
                    cell = ws.cell(row=row_idx, column=col_idx)
                    if cell.value:
                        try:
                            cell_length = len(str(cell.value))
                            if cell_length > max_length:
                                max_length = cell_length
                        except:
                            pass
                
                # Set width with padding, limit max width
                adjusted_width = min(max_length + 3, 25)
                ws.column_dimensions[col_letter].width = adjusted_width
            
            print(f"✅ Monthly sessions total data written to {sheet_name} successfully!")
        else:
            print(f"❌ Sheet {sheet_name} not found in workbook")
    
    def write_sessions_pivot_to_sheet(pivot_df, sheet_name):
        print(f"Writing beautifully formatted sessions data to sheet: {sheet_name}")
        
        if sheet_name in wb.sheetnames:
            # Get the existing sheet
            ws = wb[sheet_name]
            
            # Find the first empty row (assuming data starts from row 1)
            last_row = ws.max_row
            start_row = last_row + 3  # Leave two blank rows for spacing
            
            # Determine column spans
            total_columns = len(pivot_df.columns) + 1  # +1 for the row headers column
            
            # Write section header with professional formatting
            ws.cell(row=start_row, column=1, value="TRAFFIC COMPARISION")
            header_cell = ws.cell(row=start_row, column=1)
            header_cell.font = section_header_font
            header_cell.fill = section_header_fill
            header_cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Merge cells for the header
            ws.merge_cells(start_row=start_row, start_column=1, end_row=start_row, end_column=total_columns)
            
            # Apply borders to all merged header cells
            for col_idx in range(1, total_columns + 1):
                cell = ws.cell(row=start_row, column=col_idx)
                cell.border = thin_border
            
            # Track current row after header
            current_row = start_row + 1
            
            # Create true multi-level headers (3 levels: Week, Metric, Period)
            
            # First, organize the columns into a hierarchy: Week -> Metric -> Period
            header_hierarchy = {}
            for col in pivot_df.columns:
                week, metric, period = col
                if week not in header_hierarchy:
                    header_hierarchy[week] = {}
                if metric not in header_hierarchy[week]:
                    header_hierarchy[week][metric] = []
                header_hierarchy[week][metric].append(period)
            
            # Row 1: Week headers (top level)
            col_idx = 2  # Start from column 2 (column 1 is for row labels)
            week_start_columns = {}  # To track where each week starts
            
            for week in header_hierarchy:
                week_start_columns[week] = col_idx
                
                # Calculate total columns for this week
                week_total_cols = 0
                for metric in header_hierarchy[week]:
                    week_total_cols += len(header_hierarchy[week][metric])
                
                # Write week header and merge cells
                ws.cell(row=current_row, column=col_idx, value=week)
                week_cell = ws.cell(row=current_row, column=col_idx)
                week_cell.font = column_header_font
                week_cell.fill = column_header_fill
                week_cell.alignment = Alignment(horizontal='center', vertical='center')
                week_cell.border = thin_border
                
                # Merge cells if needed
                if week_total_cols > 1:
                    ws.merge_cells(start_row=current_row, start_column=col_idx, 
                                  end_row=current_row, end_column=col_idx + week_total_cols - 1)
                    
                    # Apply styles to all merged cells
                    for i in range(week_total_cols):
                        merged_cell = ws.cell(row=current_row, column=col_idx + i)
                        merged_cell.border = thin_border
                        merged_cell.font = column_header_font
                        merged_cell.fill = column_header_fill
                
                # Move to next position
                col_idx += week_total_cols
            
            # Add "Channel" header for first column
            ws.cell(row=current_row, column=1, value="Channel")
            channel_header = ws.cell(row=current_row, column=1)
            channel_header.font = column_header_font
            channel_header.fill = column_header_fill
            channel_header.alignment = Alignment(horizontal='center', vertical='center')
            channel_header.border = thin_border
            ws.merge_cells(start_row=current_row, start_column=1, 
                          end_row=current_row + 2, end_column=1)  # Merge across all 3 header rows
            
            # Row 2: Metric headers (middle level)
            current_row += 1
            
            for week in header_hierarchy:
                metric_start_col = week_start_columns[week]
                
                for metric in header_hierarchy[week]:
                    # Calculate how many columns this metric spans
                    metric_periods = header_hierarchy[week][metric]
                    metric_cols = len(metric_periods)
                    
                    # Write metric header
                    ws.cell(row=current_row, column=metric_start_col, value=metric)
                    metric_cell = ws.cell(row=current_row, column=metric_start_col)
                    metric_cell.font = column_header_font
                    metric_cell.fill = subheader_fill  # Slightly different shade than the week
                    metric_cell.alignment = Alignment(horizontal='center', vertical='center')
                    metric_cell.border = thin_border
                    
                    # Merge cells if needed
                    if metric_cols > 1:
                        ws.merge_cells(start_row=current_row, start_column=metric_start_col, 
                                      end_row=current_row, end_column=metric_start_col + metric_cols - 1)
                        
                        # Apply styles to all merged cells
                        for i in range(metric_cols):
                            merged_cell = ws.cell(row=current_row, column=metric_start_col + i)
                            merged_cell.border = thin_border
                            merged_cell.font = column_header_font
                            merged_cell.fill = subheader_fill
                    
                    # Move to next position
                    metric_start_col += metric_cols
            
            # Row 3: Period headers (bottom level)
            current_row += 1
            col_idx = 2  # Reset column index
            
            for col in pivot_df.columns:
                period = col[2]  # Third level
                
                # Write period header
                ws.cell(row=current_row, column=col_idx, value=period)
                period_cell = ws.cell(row=current_row, column=col_idx)
                period_cell.font = subheader_font
                period_cell.fill = PatternFill(start_color='7F7F7F', end_color='7F7F7F', fill_type='solid')  # Dark gray that works with white text
                period_cell.alignment = Alignment(horizontal='center', vertical='center')
                period_cell.border = thin_border
                
                col_idx += 1
            
            # Move to next row for data
            current_row += 1
            
            # Write data rows
            for idx in pivot_df.index:
                # Write row header (Channel name)
                ws.cell(row=current_row, column=1, value=str(idx))
                row_header_cell = ws.cell(row=current_row, column=1)
                
                # Special formatting for Grand Total
                if idx == "Grand Total":
                    row_header_cell.font = total_row_font
                    row_header_cell.fill = total_row_fill
                else:
                    row_header_cell.font = row_header_font
                    row_header_cell.fill = row_header_fill
                    
                row_header_cell.alignment = Alignment(horizontal='left', vertical='center')
                row_header_cell.border = thin_border
                
                # Write data values
                col_idx = 2
                for col in pivot_df.columns:
                    value = pivot_df.loc[idx, col]
                    ws.cell(row=current_row, column=col_idx, value=value)
                    data_cell = ws.cell(row=current_row, column=col_idx)
                    
                    # Apply cell formatting based on content
                    if idx == "Grand Total":
                        data_cell.font = Font(bold=True, size=9)
                        data_cell.fill = total_row_fill
                    else:
                        data_cell.font = Font(size=9)
                    
                    # Format numbers based on metric type
                    metric = col[1]  # Second level (metric)
                    if 'CVR' in metric:
                        data_cell.number_format = '0"%"'
                        # No conditional coloring per user request
                    elif 'AOV' in metric:
                        data_cell.number_format = '#,##0'
                    elif 'revenue' in metric.lower():
                        data_cell.number_format = '#,##0'
                    else:
                        data_cell.number_format = '#,##0'
                    
                    # Apply borders and alignment
                    data_cell.alignment = Alignment(horizontal='right', vertical='center')
                    data_cell.border = thin_border
                    
                    col_idx += 1
                
                current_row += 1
            
            # Auto-adjust column widths for better readability
            for col_idx in range(1, total_columns + 1):
                col_letter = get_column_letter(col_idx)
                max_length = 0
                for row_idx in range(start_row, current_row):
                    cell = ws.cell(row=row_idx, column=col_idx)
                    if cell.value:
                        try:
                            cell_length = len(str(cell.value))
                            if cell_length > max_length:
                                max_length = cell_length
                        except:
                            pass
                
                # Set width with padding, limit max width
                adjusted_width = min(max_length + 3, 25)
                ws.column_dimensions[col_letter].width = adjusted_width
            
            print(f"✅ Beautifully formatted sessions data written to {sheet_name} successfully!")
        else:
            print(f"❌ Sheet {sheet_name} not found in workbook")

    # Write each pivot table to its respective detailed sheet
    write_sessions_pivot_to_sheet(master_sessions_pivot_no_filter, "Cat_Week_Total")
    write_sessions_pivot_to_sheet(master_sessions_pivot_ea_only, "Cat_Week_EA")
    write_sessions_pivot_to_sheet(master_sessions_pivot_non_ea, "Cat_Week_JumboAE")
    
    # Write Total column data to respective monthly sheets
    write_sessions_monthly_total_to_sheet(master_sessions_pivot_no_filter, "Cat_Month_Total")
    write_sessions_monthly_total_to_sheet(master_sessions_pivot_ea_only, "Cat_Month_EA")
    write_sessions_monthly_total_to_sheet(master_sessions_pivot_non_ea, "Cat_Month_JumboAE")

    write_sessions_summary_to_dashboard(master_sessions_pivot_non_ea, "Sessions JumboAE")


    # Save the workbook
    wb.save(output_file)
    print(f"\n✅ Successfully appended beautifully formatted sessions data to {output_file}")
    print("📊 Sessions analysis has been added to:")
    print("  • Cat_Week_Total sheet (detailed)")
    print("  • Cat_Week_EA sheet (detailed)")
    print("  • Cat_Week_JumboAE sheet (detailed)")
    print("  • Cat_Month_Total sheet (totals only)")
    print("  • Cat_Month_EA sheet (totals only)")
    print("  • Cat_Month_JumboAE sheet (totals only)")
    
    print("\n🎨 Formatting Features Applied:")
    print("  • Professional color-coded headers and sections")
    print("  • Multi-level column headers with proper alignment")
    print("  • Highlighted Grand Total row")
    print("  • Color-coded growth percentages (green/red)")
    print("  • Consistent borders and cell styling")
    print("  • Auto-adjusted column widths")
    print("  • Proper number formatting for different metrics")
    print("  • Clean visual separation between sections")
    print("  • NEW: Monthly sheets with Total column data only")

except Exception as e:
    print(f"❌ Error while exporting to Excel: {str(e)}")
    import traceback
    traceback.print_exc()  # Print full traceback for better debugging

print("\n" + "=" * 80)

📤 EXPORTING BEAUTIFULLY FORMATTED SESSIONS PIVOT TABLES TO EXCEL...
Writing beautifully formatted sessions data to sheet: Cat_Week_Total
✅ Beautifully formatted sessions data written to Cat_Week_Total successfully!
Writing beautifully formatted sessions data to sheet: Cat_Week_EA
✅ Beautifully formatted sessions data written to Cat_Week_EA successfully!
Writing beautifully formatted sessions data to sheet: Cat_Week_JumboAE
✅ Beautifully formatted sessions data written to Cat_Week_Total successfully!
Writing beautifully formatted sessions data to sheet: Cat_Week_EA
✅ Beautifully formatted sessions data written to Cat_Week_EA successfully!
Writing beautifully formatted sessions data to sheet: Cat_Week_JumboAE
✅ Beautifully formatted sessions data written to Cat_Week_JumboAE successfully!
Writing monthly sessions total data to sheet: Cat_Month_Total
✅ Monthly sessions total data written to Cat_Month_Total successfully!
Writing monthly sessions total data to sheet: Cat_Month_EA
✅ Monthly s

In [325]:
# Optional: Open the comprehensive Excel file automatically (Windows)
import os
import subprocess

# Update to use the correct comprehensive file name
output_file = 'IDG_Weekly_Analysis_Combined.xlsx'

print("\n" + "=" * 70)
print("🎉 COMPLETE IDG WEEKLY ANALYSIS EXPORT FINISHED!")
print("=" * 70)
print("📊 ANALYSIS SUMMARY:")
print(f"• 📈 Overall Analysis: {len(all_idgs_global)} IDGs across {max_week_global} weeks")
print(f"• 🏢 EA Only Analysis: {len(all_idgs_ea)} IDGs across {max_week_ea} weeks")
print(f"• 🛍️ Jumbo.ae Analysis: {len(all_idgs_jumbo)} IDGs across {max_week_jumbo} weeks")
print(f"• 📅 Periods Analyzed: {', '.join(periods)}")
print(f"• 🗺️ Week Calculation: Based on {start_day} as first day of month")
print(f"• 📎 Export File: {output_file}")
print("\n🏆 All pivot tables created with comprehensive analysis!")
print("📊 Ready for business insights and decision making!")
print("=" * 70)


🎉 COMPLETE IDG WEEKLY ANALYSIS EXPORT FINISHED!
📊 ANALYSIS SUMMARY:
• 📈 Overall Analysis: 15 IDGs across 6 weeks
• 🏢 EA Only Analysis: 15 IDGs across 6 weeks
• 🛍️ Jumbo.ae Analysis: 15 IDGs across 6 weeks
• 📅 Periods Analyzed: May 25, June 24, June 25
• 🗺️ Week Calculation: Based on Sunday as first day of month
• 📎 Export File: IDG_Weekly_Analysis_Combined.xlsx

🏆 All pivot tables created with comprehensive analysis!
📊 Ready for business insights and decision making!


In [326]:
import win32com.client
import os
import time

def autofit_excel_conditional_sheets(file_path):
    """
    Opens an Excel file, renames 'Sheet1' to 'DSR', auto-fits column widths for all sheets.
    If the sheet is now 'DSR', it auto-fits columns from 'C' onwards.
    Otherwise, it auto-fits all columns.
    Then saves and closes the file using pywin32.
    """
    if not os.path.exists(file_path):
        print(f"❌ File not found at: {file_path}")
        return

    excel_app = None
    workbook = None

    try:
        excel_app = win32com.client.DispatchEx("Excel.Application")
        excel_app.Visible = False
        excel_app.DisplayAlerts = False

        workbook = excel_app.Workbooks.Open(file_path, ReadOnly=False)

        print(f"📄 Processing auto-fit and rename in: '{os.path.basename(file_path)}'")
        
        for ws in workbook.Sheets:
            if ws.Name == "Sheet1":
                print(f"  - Renaming 'Sheet1' to 'DSR'")
                ws.Name = "DSR"
                print(f"  - Auto-fitting columns C onward for renamed sheet: 'DSR'")
                ws.Range("C:XFD").Columns.AutoFit()
            elif ws.Name == "DSR":
                print(f"  - Auto-fitting columns C onward for sheet: 'DSR'")
                ws.Range("C:XFD").Columns.AutoFit()
            else:
                print(f"  - Auto-fitting ALL columns for sheet: '{ws.Name}'")
                ws.Columns.AutoFit()

        # Save logic with fallback
        try:
            workbook.Save()
            print("✅ Saved successfully.")
        except Exception as save_err:
            print(f"⚠️ Save failed: {save_err}")
            fallback_path = file_path.replace(".xlsx", "_autosaved.xlsx")
            print(f"🔁 Saving as new file: {fallback_path}")
            workbook.SaveAs(fallback_path)

        workbook.Close(SaveChanges=0)

    except Exception as e:
        print(f"❌ Error: {e}")

    finally:
        if excel_app:
            excel_app.Quit()
            excel_app = None


In [327]:
import os
from copy import copy

from openpyxl import Workbook

def copy_raw_sheets_to_destination():
    """
    Copy the 3 raw invoice sheets to the destination file
    """
    dest_path = 'invoice_day_channel_report_compatible.xlsx'
    
    if not os.path.exists(dest_path):
        print(f"Error: Destination file {dest_path} not found!")
        return
    
    print(f"\n📋 Starting to copy raw invoice sheets to {dest_path}")
    
    try:
        # Load destination workbook
        dest_wb = load_workbook(dest_path)
        
        # Copy each raw sheet from sheet_info
        for i, (source_file, source_sheet, display_name) in enumerate(sheet_info):
            if not os.path.exists(source_file):
                print(f"⚠️  Warning: Raw file {source_file} not found, skipping...")
                continue
            
            try:
                # Load source workbook
                source_wb = load_workbook(source_file, data_only=True)
                source_ws = source_wb[source_sheet]
                
                # Create unique sheet name for raw data
                raw_sheet_name = f"Raw_{display_name.replace(' ', '_')}"
                counter = 1
                new_sheet_name = raw_sheet_name
                while new_sheet_name in dest_wb.sheetnames:
                    new_sheet_name = f"{raw_sheet_name}_{counter}"
                    counter += 1
                
                # Create new worksheet in destination
                dest_ws = dest_wb.create_sheet(title=new_sheet_name)
                
                print(f"📊 Copying raw sheet '{source_sheet}' from {display_name} as '{new_sheet_name}'...")
                
                # Get the range of data to copy
                source_max_row = source_ws.max_row
                source_max_col = source_ws.max_column
                
                # Handle merged cells first
                merged_ranges = list(source_ws.merged_cells.ranges)
                for merged_range in merged_ranges:
                    # Create the same merge range in destination
                    dest_ws.merge_cells(str(merged_range))
                    
                    # Copy the value from the top-left cell of the merge range
                    source_value = source_ws.cell(merged_range.min_row, merged_range.min_col).value
                    dest_ws.cell(merged_range.min_row, merged_range.min_col).value = source_value
                    
                    # Copy formatting from the first cell of merge range
                    source_cell = source_ws.cell(merged_range.min_row, merged_range.min_col)
                    dest_cell = dest_ws.cell(merged_range.min_row, merged_range.min_col)
                    
                    if source_cell.has_style:
                        dest_cell.font = copy(source_cell.font)
                        dest_cell.fill = copy(source_cell.fill)
                        dest_cell.border = copy(source_cell.border)
                        dest_cell.alignment = copy(source_cell.alignment)
                        dest_cell.number_format = source_cell.number_format
                
                # Copy all data and formatting
                for row in range(1, source_max_row + 1):
                    for col in range(1, source_max_col + 1):
                        # Skip if this cell is part of a merged range
                        if any(merged_range.min_row <= row <= merged_range.max_row and 
                            merged_range.min_col <= col <= merged_range.max_col 
                            for merged_range in merged_ranges):
                            continue
                        
                        # Get source cell
                        source_cell = source_ws.cell(row=row, column=col)
                        
                        # Get destination cell
                        dest_cell = dest_ws.cell(row=row, column=col)
                        
                        # Copy value
                        dest_cell.value = source_cell.value
                        
                        # Copy formatting if it has any
                        if source_cell.has_style:
                            dest_cell.font = copy(source_cell.font)
                            dest_cell.fill = copy(source_cell.fill)
                            dest_cell.border = copy(source_cell.border)
                            dest_cell.alignment = copy(source_cell.alignment)
                            dest_cell.number_format = source_cell.number_format
                
                # Copy column widths
                for col in range(1, source_max_col + 1):
                    col_letter = get_column_letter(col)
                    if source_ws.column_dimensions[col_letter].width:
                        dest_ws.column_dimensions[col_letter].width = source_ws.column_dimensions[col_letter].width
                
                # Copy row heights
                for row in range(1, source_max_row + 1):
                    if source_ws.row_dimensions[row].height:
                        dest_ws.row_dimensions[row].height = source_ws.row_dimensions[row].height
                
                print(f"✅ Successfully copied {new_sheet_name} ({source_max_row} rows, {source_max_col} columns)")
                
            except Exception as e:
                print(f"❌ Error copying raw sheet from {display_name}: {str(e)}")
                continue
        
        # Save the destination workbook
        dest_wb.save(dest_path)
        print(f"\n✅ Successfully copied all raw invoice sheets to {dest_path}")
        print(f"📋 Final sheets in destination: {dest_wb.sheetnames}")
        
    except Exception as e:
        print(f"❌ Error occurred while copying raw sheets: {str(e)}")
        import traceback
        traceback.print_exc()


def copy_all_sheets_from_combined():
    """
    Copy all sheets from IDG_Weekly_Analysis_Combined.xlsx to invoice_day_channel_report_compatible.xlsx
    Preserving the existing Sheet1 data and appending all other sheets
    """
    source_path = 'IDG_Weekly_Analysis_Combined.xlsx'
    dest_path = 'invoice_day_channel_report_compatible.xlsx'
    
    print(f"Starting to copy sheets from {source_path} to {dest_path}")
    
    # Check if source file exists
    if not os.path.exists(source_path):
        print(f"Error: Source file {source_path} not found!")
        return
       
    if os.path.exists(source_path):
        print(f"Opening {os.path.abspath(source_path)} to ensure it's saved and closed properly...")
        open_save_close_excel_dynamic(os.path.abspath(source_path))

    try:
        # Load both workbooks
        source_wb = load_workbook(source_path, data_only=True)
        
        # Check if destination file exists, if not create it with a basic sheet
        if os.path.exists(dest_path):
            dest_wb = load_workbook(dest_path)
        else:
            dest_wb = Workbook()
            print(f"Created new destination file: {dest_path}")
        
        # Get all sheet names from source workbook
        source_sheet_names = source_wb.sheetnames
        existing_dest_sheet_names = dest_wb.sheetnames
        
        print(f"Source sheets to copy: {source_sheet_names}")
        print(f"Existing destination sheets: {existing_dest_sheet_names}")
        
        # Copy each sheet from source to destination
        for sheet_name in source_sheet_names:
            source_ws = source_wb[sheet_name]
            
            # Create a unique sheet name if it already exists in destination
            new_sheet_name = sheet_name
            counter = 1
            while new_sheet_name in dest_wb.sheetnames:
                new_sheet_name = f"{sheet_name}_{counter}"
                counter += 1
            
            # Create new worksheet in destination
            dest_ws = dest_wb.create_sheet(title=new_sheet_name)
            
            print(f"Copying sheet '{sheet_name}' as '{new_sheet_name}'...")
            
            # Get the range of data to copy
            source_max_row = source_ws.max_row
            source_max_col = source_ws.max_column
            
            # Handle merged cells first
            merged_ranges = source_ws.merged_cells.ranges
            for merged_range in merged_ranges:
                # Create the same merge range in destination
                dest_ws.merge_cells(str(merged_range))
                
                # Copy the value from the top-left cell of the merge range
                source_value = source_ws.cell(merged_range.min_row, merged_range.min_col).value
                dest_ws.cell(merged_range.min_row, merged_range.min_col).value = source_value
                
                # Copy formatting from the first cell of merge range
                source_cell = source_ws.cell(merged_range.min_row, merged_range.min_col)
                dest_cell = dest_ws.cell(merged_range.min_row, merged_range.min_col)
                
                if source_cell.has_style:
                    dest_cell.font = copy(source_cell.font)
                    dest_cell.fill = copy(source_cell.fill)
                    dest_cell.border = copy(source_cell.border)
                    dest_cell.alignment = copy(source_cell.alignment)
                    dest_cell.number_format = source_cell.number_format
            
            # Copy all data and formatting
            for row in range(1, source_max_row + 1):
                for col in range(1, source_max_col + 1):
                    # Skip if this cell is part of a merged range
                    if any(merged_range.min_row <= row <= merged_range.max_row and 
                        merged_range.min_col <= col <= merged_range.max_col 
                        for merged_range in merged_ranges):
                        continue
                    
                    # Get source cell
                    source_cell = source_ws.cell(row=row, column=col)
                    
                    # Get destination cell
                    dest_cell = dest_ws.cell(row=row, column=col)
                    
                    # Copy value
                    dest_cell.value = source_cell.value
                    
                    # Copy formatting if it has any
                    if source_cell.has_style:
                        dest_cell.font = copy(source_cell.font)
                        dest_cell.fill = copy(source_cell.fill)
                        dest_cell.border = copy(source_cell.border)
                        dest_cell.alignment = copy(source_cell.alignment)
                        dest_cell.number_format = source_cell.number_format
            
            # Copy column widths
            for col in range(1, source_max_col + 1):
                col_letter = get_column_letter(col)
                if source_ws.column_dimensions[col_letter].width:
                    dest_ws.column_dimensions[col_letter].width = source_ws.column_dimensions[col_letter].width
            
            # Copy row heights
            for row in range(1, source_max_row + 1):
                if source_ws.row_dimensions[row].height:
                    dest_ws.row_dimensions[row].height = source_ws.row_dimensions[row].height
        
            # 🔒 FREEZE FIRST COLUMN: Make the first column sticky
            try:
                # Freeze panes at B1 to make column A (first column) sticky
                dest_ws.freeze_panes = 'B1'
                print(f"   ✅ Made first column sticky for sheet '{new_sheet_name}'")
            except Exception as freeze_error:
                print(f"   ⚠️  Warning: Could not freeze first column for '{new_sheet_name}': {freeze_error}")
        
        # Save the destination workbook
        dest_wb.save(dest_path)
        print(f"Successfully copied all sheets from {source_path} to {dest_path}")
        print(f"Final sheets in destination: {dest_wb.sheetnames}")


        # NOW COPY THE RAW INVOICE SHEETS
        print(f"\n🔄 Now copying raw invoice sheets...")
        copy_raw_sheets_to_destination()

        autofit_excel_conditional_sheets(os.path.abspath(dest_path))

        if os.path.exists(source_path):
            os.remove(source_path)

        
        # if os.path.exists(dest_path):
        #     os.startfile(dest_path)
        # print(f"Opened {dest_path} in Excel successfully!")
        
    except Exception as e:
        print(f"Error occurred while copying sheets: {str(e)}")
        import traceback
        traceback.print_exc()

# Execute the copy function
copy_all_sheets_from_combined()

Starting to copy sheets from IDG_Weekly_Analysis_Combined.xlsx to invoice_day_channel_report_compatible.xlsx
Opening c:\Users\91843\Documents\VsCode Codes\ReportAutomation\IDG_Weekly_Analysis_Combined.xlsx to ensure it's saved and closed properly...
Waiting for calculation to complete for c:\Users\91843\Documents\VsCode Codes\ReportAutomation\IDG_Weekly_Analysis_Combined.xlsx...
Calculation complete.
Successfully opened, saved, and closed: c:\Users\91843\Documents\VsCode Codes\ReportAutomation\IDG_Weekly_Analysis_Combined.xlsx
Waiting for calculation to complete for c:\Users\91843\Documents\VsCode Codes\ReportAutomation\IDG_Weekly_Analysis_Combined.xlsx...
Calculation complete.
Successfully opened, saved, and closed: c:\Users\91843\Documents\VsCode Codes\ReportAutomation\IDG_Weekly_Analysis_Combined.xlsx
Source sheets to copy: ['Cat_Week_Total', 'Cat_Week_EA', 'Cat_Week_JumboAE', 'Cat_Month_Total', 'Cat_Month_EA', 'Cat_Month_JumboAE', 'Weekly', 'Weekly_Totals']
Existing destination she

# Add Funnel Analysis Report

This section processes funnel CSV files from multiple months and adds a `Monthly_Funnel_Report` sheet to the main Excel output.

In [341]:
import re


def process_funnel_csv_file(file_path):
    """Process a single funnel CSV file and return formatted data"""
    try:
        # Read the file to understand its structure
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        # Extract date from line 4 (index 3)
        date_info = None
        if len(lines) > 3:
            date_line = lines[3].strip()
            date_match = re.search(r'(\d{8})-(\d{8})', date_line)
            if date_match:
                date_str = date_match.group(1)
                formatted_date = datetime.strptime(date_str, '%Y%m%d').strftime('%Y-%m-%d')
                date_info = formatted_date
        
        # Try different separators to read the data part starting from row 7
        data_start = 6  # Start from line 7 (index 6)
        
        for sep in [';', ',', '\t']:
            try:
                df = pd.read_csv(file_path, skiprows=data_start, sep=sep, header=0)
                if df.shape[1] > 1 and not df.empty:
                    df = df.dropna(how='all')
                    
                    # Find Step and Active users columns
                    if any('Step' in str(col) for col in df.columns) and any('Active users' in str(col) for col in df.columns):
                        step_col = next((col for col in df.columns if 'Step' in str(col)), None)
                        users_col = next((col for col in df.columns if 'Active users' in str(col)), None)
                        
                        if step_col and users_col:
                            result_df = df[[step_col, users_col]].copy()
                            result_df.columns = ['Step', 'Active users']
                            result_df['Date'] = date_info
                            return result_df
            except:
                continue
        
        return None
        
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [342]:
import re
from openpyxl.styles import Font, Alignment, PatternFill


def process_and_add_funnel_report():
    """Process funnel data and add Monthly_Funnel_Report sheet to the main Excel file"""
    try:
        # Import additional required modules
        
        
        # Build funnel folder paths dynamically based on DSR configuration
        if 'config' in globals() and 'dates' in config:
            dates_config = config['dates']
            funnel_folder_paths = []
            
            # Build paths for all three periods: latest, last_month, last_year
            for period in ['latest', 'last_month', 'last_year']:
                if period in dates_config:
                    period_data = dates_config[period]
                    funnel_path = os.path.join(dsr_folder_path, period_data['folder'], 'funnel')
                    funnel_folder_paths.append(funnel_path)
                    
            print(f"🔍 Funnel folder paths: {funnel_folder_paths}")
        else:
            print("⚠️ DSR configuration not found, using default funnel paths")
            return False
            
        # Process all CSV files from all funnel month folders
        all_funnel_data = []
        successful_funnel_files = []
        total_funnel_files = 0
        
        print(f"🔍 Processing funnel CSV files from {len(funnel_folder_paths)} month folders...")
        
        for folder_path in funnel_folder_paths:
            if os.path.exists(folder_path):
                csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
                total_funnel_files += len(csv_files)
                month_name = os.path.basename(os.path.dirname(folder_path))
                
                print(f"\n--- Processing {month_name} funnel folder ({len(csv_files)} files) ---")
                
                for csv_file in csv_files:
                    file_path = os.path.join(folder_path, csv_file)
                    result = process_funnel_csv_file(file_path)
                    
                    if result is not None:
                        # Validate that the date matches the expected month/year from folder
                        file_date = pd.to_datetime(result['Date'].iloc[0])
                        expected_year = int(month_name.split('-')[1])
                        expected_month = month_name.split('-')[0]
                        
                        # Convert month name to number for comparison
                        month_num = {
                            'January': 1, 'February': 2, 'March': 3, 'April': 4,
                            'May': 5, 'June': 6, 'July': 7, 'August': 8,
                            'September': 9, 'October': 10, 'November': 11, 'December': 12
                        }.get(expected_month, 0)
                        
                        # Check if date matches folder
                        if file_date.year == expected_year and file_date.month == month_num:
                            all_funnel_data.append(result)
                            successful_funnel_files.append(f"{month_name}/{csv_file}")
                            print(f"✅ {csv_file} - Date: {result['Date'].iloc[0]}")
                        else:
                            print(f"⚠️ {csv_file} - Date mismatch: {result['Date'].iloc[0]} (expected {expected_month} {expected_year})")
                    else:
                        print(f"❌ Failed to process {csv_file}")
            else:
                print(f"⚠️ Funnel folder not found: {folder_path}")
        
        if not all_funnel_data:
            print("❌ No funnel data was successfully processed")
            return False
            
        # Combine all funnel data
        combined_funnel_df = pd.concat(all_funnel_data, ignore_index=True)
        combined_funnel_df = combined_funnel_df.sort_values(['Date', 'Step']).reset_index(drop=True)
        
        print(f"\n=== FUNNEL PROCESSING COMPLETE ===")
        print(f"Successfully processed: {len(successful_funnel_files)}/{total_funnel_files} files")
        print(f"Date range: {combined_funnel_df['Date'].min()} to {combined_funnel_df['Date'].max()}")
        print(f"Total records: {len(combined_funnel_df)}")
        print(f"Unique dates: {combined_funnel_df['Date'].nunique()}")
        
        # Add funnel sheet to the main Excel file
        excel_file_path = 'invoice_day_channel_report_compatible.xlsx'
        
        if not os.path.exists(excel_file_path):
            print(f"❌ Main Excel file not found: {excel_file_path}")
            return False
            
        # Open the existing Excel file and add the funnel sheet
        from openpyxl import load_workbook
        
        workbook = load_workbook(excel_file_path)
        
        # Create the funnel sheet
        if 'Monthly_Funnel_Report' in workbook.sheetnames:
            del workbook['Monthly_Funnel_Report']
            
        worksheet = workbook.create_sheet(title='Monthly_Funnel_Report')
        
        # Create center alignment style
        center_alignment = Alignment(horizontal='center', vertical='center')
        
        # Extract month-year from dates and organize data by month
        combined_funnel_df['Month_Year'] = pd.to_datetime(combined_funnel_df['Date']).dt.strftime('%Y-%m')
        months = sorted(combined_funnel_df['Month_Year'].unique())
        
        # Create combined summary table at the top
        # Find the maximum number of days available in the LATEST month (not the month with most days)
        # The latest month is the most recent chronologically
        latest_month = max(months)  # Since months are sorted, the last one is the latest
        max_days = combined_funnel_df[combined_funnel_df['Month_Year'] == latest_month]['Date'].nunique()
        
        # Create summary table data structure
        # Get unique steps (assuming all months have same steps)
        all_steps = combined_funnel_df['Step'].unique()
        
        # Initialize variables early to avoid UnboundLocalError
        ordered_period_names = []
        ordered_months = []
        
        # Build period names based on ACTUAL available data months, in the correct order
        # Define the correct period order: Last Month, Last Year, Latest Month
        period_order = ['last_month', 'last_year', 'latest']
        
        if 'config' in globals() and 'dates' in config:
            dates_config = config['dates']
            for period in period_order:
                if period in dates_config:
                    month_name = dates_config[period]['month']
                    year = dates_config[period]['year']
                    period_display_name = f"{month_name[:3]}-{str(year)[-2:]}"
                    
                    # Find corresponding month in our ACTUAL data by matching month and year
                    for month in months:
                        month_date = pd.to_datetime(month)
                        if (month_date.strftime('%B') == month_name and 
                            month_date.year == year):
                            ordered_period_names.append(period_display_name)
                            ordered_months.append(month)
                            print(f"✅ Matched {period_display_name} to data month {month}")
                            break
                    else:
                        print(f"⚠️ No data found for period {period_display_name}")
        
        # If config failed or no matches found, use fallback based on actual data
        if not ordered_period_names or not ordered_months:
            print("⚠️ No DSR config found or no matches, using fallback period names")
            ordered_period_names = [pd.to_datetime(month).strftime('%b-%y') for month in months]
            ordered_months = months.copy()
        
        # Debug information
        print(f"\n🔍 Debug Info:")
        print(f"Available data months: {months}")
        print(f"Latest month: {latest_month}")
        print(f"Max days (from latest month): {max_days}")
        print(f"Ordered period names: {ordered_period_names}")
        print(f"Ordered months: {ordered_months}")
        print(f"All steps: {list(all_steps)}")
        
        # Ensure we have at least some data to work with
        if not ordered_period_names or not ordered_months:
            print("❌ No valid periods or months found!")
            return False
        
        # Create summary data for each month, truncated to max_days
        summary_data = {}
        
        # Debug information  
        print(f"\n🔍 Debug Info:")
        print(f"Available data months: {months}")
        print(f"Ordered period names: {ordered_period_names}")
        print(f"Ordered months: {ordered_months}")
        print(f"Max days: {max_days}")
        print(f"All steps: {list(all_steps)}")
        
        # Ensure we have the required variables
        if not ordered_period_names:
            print("❌ No ordered period names available")
            return False
            
        if not ordered_months:
            print("❌ No ordered months available")
            return False
        
        # Process periods in the correct order: last_month, last_year, latest
        for i in range(len(ordered_period_names)):
            if i < len(ordered_months):
                month = ordered_months[i]
                period_name = ordered_period_names[i]
                
                print(f"\n📊 Processing {period_name} (data month: {month})")
                
                month_data = combined_funnel_df[combined_funnel_df['Month_Year'] == month]
                
                if month_data.empty:
                    print(f"❌ No data found for month {month}")
                    continue
                
                # Get dates for this month, limited to max_days
                month_dates = sorted(month_data['Date'].unique())[:max_days]
                print(f"   Dates available: {len(month_data['Date'].unique())} total, using first {len(month_dates)}")
                
                # Sum active users across all days for each step
                step_totals = {}
                for step in all_steps:
                    step_data = month_data[month_data['Step'] == step]
                    # Only include dates up to max_days
                    step_data_limited = step_data[step_data['Date'].isin(month_dates)]
                    total_users = step_data_limited['Active users'].sum()
                    step_totals[step] = total_users
                    print(f"   {step}: {total_users:,} total active users")
                
                summary_data[period_name] = step_totals
        
        print(f"\n📋 Summary data created for periods: {list(summary_data.keys())}")
        
        # Validate we have summary data
        if not summary_data:
            print("❌ No summary data was created - cannot proceed with table creation")
            return False
        
        # Add combined summary table at the top of the worksheet
        current_row = 1
        
        # Add title for combined summary with truncation info - Check if cell is already merged
        title_text = f"Combined Funnel Summary - Truncated to {max_days} Days"
        title_cell = worksheet.cell(row=current_row, column=1)
        
        try:
            # Try to set value - if it's not merged, this will work
            title_cell.value = title_text
            title_cell.font = Font(bold=True, size=16, color="FFFFFF")
            title_cell.alignment = center_alignment
            title_cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
            
            # Calculate merge range and apply formatting to all cells that will be merged
            total_cols = 1 + (len(ordered_period_names) * 2)  # Step column + 2 columns per period
            
            # Apply same formatting to cells that will be merged
            for col_idx in range(2, total_cols + 1):
                cell = worksheet.cell(row=current_row, column=col_idx)
                cell.font = Font(bold=True, size=16, color="FFFFFF")
                cell.alignment = center_alignment
                cell.fill = PatternFill(start_color="4472C4", end_color="4472C4", fill_type="solid")
            
            # Now merge cells AFTER setting values and formatting
            worksheet.merge_cells(start_row=current_row, start_column=1, end_row=current_row, end_column=total_cols)
            
        except AttributeError:
            # Cell is already merged, skip the merge operation
            print(f"⚠️ Funnel title cell is already merged, skipping merge operation")
            pass
        current_row += 2
        
        # Create table with proper borders and formatting
        from openpyxl.styles import Border, Side
        
        # Define border styles
        thick_border = Border(
            left=Side(style='thick'),
            right=Side(style='thick'),
            top=Side(style='thick'),
            bottom=Side(style='thick')
        )
        
        thin_border = Border(
            left=Side(style='thin'),
            right=Side(style='thin'),
            top=Side(style='thin'),
            bottom=Side(style='thin')
        )
        
        # Create main table headers
        header_row1 = current_row
        header_row2 = current_row + 1
        
        # Step column header (spans 2 rows) - Check if cell is already merged
        step_header = worksheet.cell(row=header_row1, column=1)
        
        try:
            # Try to set value - if it's not merged, this will work
            step_header.value = "Step"
            step_header.font = Font(bold=True, size=12, color="FFFFFF")
            step_header.alignment = center_alignment
            step_header.fill = PatternFill(start_color="5B9BD5", end_color="5B9BD5", fill_type="solid")
            step_header.border = thick_border
            
            # Apply same formatting to the cell that will be merged
            step_header2 = worksheet.cell(row=header_row2, column=1)
            step_header2.font = Font(bold=True, size=12, color="FFFFFF")
            step_header2.alignment = center_alignment
            step_header2.fill = PatternFill(start_color="5B9BD5", end_color="5B9BD5", fill_type="solid")
            step_header2.border = thick_border
            
            # Now merge cells AFTER setting values and formatting
            worksheet.merge_cells(start_row=header_row1, start_column=1, end_row=header_row2, end_column=1)
            
        except AttributeError:
            # Cell is already merged, skip the merge operation
            print(f"⚠️ Funnel Step header cell is already merged, skipping merge operation")
            pass
        
        # Period headers with correct order (using previously calculated ordered_period_names)
        col_idx = 2
        for i, period_name in enumerate(ordered_period_names):
            # Main period header (spans 2 columns) - Check if cell is already merged
            period_header = worksheet.cell(row=header_row1, column=col_idx)
            
            try:
                # Try to set value - if it's not merged, this will work
                period_header.value = period_name
                period_header.font = Font(bold=True, size=12, color="FFFFFF")
                period_header.alignment = center_alignment
                period_header.fill = PatternFill(start_color="70AD47", end_color="70AD47", fill_type="solid")
                period_header.border = thick_border
                
                # Apply same formatting to the cell that will be merged
                period_header2 = worksheet.cell(row=header_row1, column=col_idx+1)
                period_header2.font = Font(bold=True, size=12, color="FFFFFF")
                period_header2.alignment = center_alignment
                period_header2.fill = PatternFill(start_color="70AD47", end_color="70AD47", fill_type="solid")
                period_header2.border = thick_border
                
                # Now merge cells AFTER setting values and formatting
                worksheet.merge_cells(start_row=header_row1, start_column=col_idx, end_row=header_row1, end_column=col_idx+1)
                
            except AttributeError:
                # Cell is already merged, skip the merge operation
                print(f"⚠️ Funnel period header '{period_name}' cell is already merged, skipping merge operation")
                pass
            
            # Sub-headers
            # Active users subheader
            worksheet.cell(row=header_row2, column=col_idx, value="Active Users")
            users_header = worksheet.cell(row=header_row2, column=col_idx)
            users_header.font = Font(bold=True, size=10, color="FFFFFF")
            users_header.alignment = center_alignment
            users_header.fill = PatternFill(start_color="A9D18E", end_color="A9D18E", fill_type="solid")
            users_header.border = thin_border
            
            # Completion rate subheader
            worksheet.cell(row=header_row2, column=col_idx+1, value="Completion Rate")
            rate_header = worksheet.cell(row=header_row2, column=col_idx+1)
            rate_header.font = Font(bold=True, size=10, color="FFFFFF")
            rate_header.alignment = center_alignment
            rate_header.fill = PatternFill(start_color="A9D18E", end_color="A9D18E", fill_type="solid")
            rate_header.border = thin_border
            
            col_idx += 2
        
        current_row = header_row2 + 1
        
        # Add data rows for each step with proper formatting
        for step_idx, step in enumerate(all_steps):
            # Step cell
            worksheet.cell(row=current_row, column=1, value=step)
            step_cell = worksheet.cell(row=current_row, column=1)
            step_cell.alignment = center_alignment
            step_cell.font = Font(bold=True)
            step_cell.fill = PatternFill(start_color="E7E6E6", end_color="E7E6E6", fill_type="solid")
            step_cell.border = thin_border
            
            col_idx = 2
            
            # Use ordered periods instead of original order
            for i, period_name in enumerate(ordered_period_names):
                # Find the corresponding month data
                if period_name in summary_data:
                    active_users = summary_data[period_name].get(step, 0)
                    
                    # Add active users with formatting
                    worksheet.cell(row=current_row, column=col_idx, value=f"{active_users:,}")
                    users_cell = worksheet.cell(row=current_row, column=col_idx)
                    users_cell.alignment = center_alignment
                    users_cell.border = thin_border
                    
                    # Alternate row colors for better readability
                    if step_idx % 2 == 0:
                        users_cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
                    
                    col_idx += 1
                    
                    # Calculate and add completion rate
                    if step == all_steps[0]:  # First step - no completion rate
                        worksheet.cell(row=current_row, column=col_idx, value="")
                        rate_cell = worksheet.cell(row=current_row, column=col_idx)
                    else:
                        # Get previous step's value for this period
                        prev_step = all_steps[list(all_steps).index(step) - 1]
                        prev_users = summary_data[period_name].get(prev_step, 0)
                        
                        if prev_users > 0:
                            completion_rate = (active_users / prev_users) * 100
                            worksheet.cell(row=current_row, column=col_idx, value=f"{completion_rate:.0f}%")
                        else:
                            worksheet.cell(row=current_row, column=col_idx, value="0%")
                        
                        rate_cell = worksheet.cell(row=current_row, column=col_idx)
                    
                    rate_cell.alignment = center_alignment
                    rate_cell.border = thin_border
                    
                    # Alternate row colors for better readability
                    if step_idx % 2 == 0:
                        rate_cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
                    
                    col_idx += 1
                else:
                    print(f"⚠️ No data found for period {period_name} in summary_data")
                    # Add empty cells
                    worksheet.cell(row=current_row, column=col_idx, value="")
                    worksheet.cell(row=current_row, column=col_idx+1, value="")
                    col_idx += 2
            
            current_row += 1
        
        # Add outer border to the entire table
        table_end_row = current_row - 1
        table_end_col = col_idx - 1
        
        # Apply thick border to table perimeter
        for row in range(header_row1, table_end_row + 1):
            for col in range(1, table_end_col + 1):
                cell = worksheet.cell(row=row, column=col)
                
                # Apply thick border to outer edges
                border_left = Side(style='thick') if col == 1 else Side(style='thin')
                border_right = Side(style='thick') if col == table_end_col else Side(style='thin')
                border_top = Side(style='thick') if row == header_row1 else Side(style='thin')
                border_bottom = Side(style='thick') if row == table_end_row else Side(style='thin')
                
                cell.border = Border(left=border_left, right=border_right, top=border_top, bottom=border_bottom)
        
        # Add some spacing before the monthly detailed data
        current_row += 3
        
        # Add title for detailed monthly data
        worksheet.cell(row=current_row, column=1, value="Detailed Daily Funnel Data by Month")
        detail_title_cell = worksheet.cell(row=current_row, column=1)
        detail_title_cell.font = Font(bold=True, size=14)
        detail_title_cell.alignment = center_alignment
        current_row += 2
        
        # Set column widths for the summary table
        worksheet.column_dimensions['A'].width = 25  # Step column
        for i in range(2, col_idx):
            col_letter = get_column_letter(i)
            worksheet.column_dimensions[col_letter].width = 15
        
        # Now add the detailed monthly data below the summary table in correct order
        # Define column spacing for each month (each month will take 7 columns)
        cols_per_month = 7  # 5 data columns + 2 spacing columns
        monthly_data_start_row = current_row
        
        # Process months in the correct order: Last Month, Last Year, Latest Month
        for month_idx, month in enumerate(ordered_months):
            # Calculate starting column for this month
            start_col = month_idx * cols_per_month + 1
            
            # Get data for this month
            month_data = combined_funnel_df[combined_funnel_df['Month_Year'] == month].copy()
            dates = sorted(month_data['Date'].unique())
            
            # Add month header
            month_title = f"Month: {pd.to_datetime(month).strftime('%B %Y')}"
            worksheet.cell(row=monthly_data_start_row, column=start_col, value=month_title)
            
            # Make month title bold and larger
            month_cell = worksheet.cell(row=monthly_data_start_row, column=start_col)
            month_cell.font = Font(bold=True, size=16)
            month_cell.alignment = center_alignment
            
            current_row = monthly_data_start_row + 2  # Start from row after header
            
            # Process each day in this month
            for date in dates:
                # Get data for this date
                day_data = month_data[month_data['Date'] == date][['Step', 'Active users']].reset_index(drop=True)
                
                # Calculate completion rate, abandonments, and abandonment rate starting from step 2
                completion_rates = []
                abandonments = []
                abandonment_rates = []
                
                for i, row in day_data.iterrows():
                    if i == 0:  # First step (Session Start) - no completion rate, abandonments, or abandonment rate
                        completion_rates.append("")
                        abandonments.append("")
                        abandonment_rates.append("")
                    else:
                        # Get previous step's active users
                        previous_step_users = day_data.iloc[i-1]['Active users']
                        current_step_users = row['Active users']
                        
                        # Calculate completion rate
                        if previous_step_users > 0:
                            completion_rate = (current_step_users / previous_step_users) * 100
                            completion_rates.append(f"{completion_rate:.1f}%")
                        else:
                            completion_rates.append("0.0%")
                        
                        # Calculate abandonments (previous step users - current step users)
                        abandonment_count = previous_step_users - current_step_users
                        abandonments.append(abandonment_count)
                        
                        # Calculate abandonment rate (abandonments / previous step users * 100)
                        if previous_step_users > 0:
                            abandonment_rate = (abandonment_count / previous_step_users) * 100
                            abandonment_rates.append(f"{abandonment_rate:.1f}%")
                        else:
                            abandonment_rates.append("0.0%")
                
                day_data['Completion rate'] = completion_rates
                day_data['Abandonments'] = abandonments
                day_data['Abandonment rate'] = abandonment_rates
                
                # Add title for the day
                worksheet.cell(row=current_row, column=start_col, value=f"Date: {date}")
                
                # Make date title bold
                title_cell = worksheet.cell(row=current_row, column=start_col)
                title_cell.font = Font(bold=True, size=14)
                title_cell.alignment = center_alignment
                
                current_row += 2  # Skip a row
                
                # Add table headers
                worksheet.cell(row=current_row, column=start_col, value="Step")
                worksheet.cell(row=current_row, column=start_col+1, value="Active users")
                worksheet.cell(row=current_row, column=start_col+2, value="Completion rate")
                worksheet.cell(row=current_row, column=start_col+3, value="Abandonments")
                worksheet.cell(row=current_row, column=start_col+4, value="Abandonment rate")
                
                # Make headers bold and centered
                for col_offset in range(5):
                    header_cell = worksheet.cell(row=current_row, column=start_col+col_offset)
                    header_cell.font = Font(bold=True)
                    header_cell.alignment = center_alignment
                
                current_row += 1
                
                # Add data rows with center alignment
                for index, row in day_data.iterrows():
                    worksheet.cell(row=current_row, column=start_col, value=row['Step'])
                    worksheet.cell(row=current_row, column=start_col+1, value=row['Active users'])
                    worksheet.cell(row=current_row, column=start_col+2, value=row['Completion rate'])
                    worksheet.cell(row=current_row, column=start_col+3, value=row['Abandonments'])
                    worksheet.cell(row=current_row, column=start_col+4, value=row['Abandonment rate'])
                    
                    # Apply center alignment to all data cells
                    for col_offset in range(5):
                        data_cell = worksheet.cell(row=current_row, column=start_col+col_offset)
                        data_cell.alignment = center_alignment
                    
                    current_row += 1
                
                current_row += 2  # Add space between days
        
        # Adjust column widths for monthly detailed data
        for month_idx in range(len(ordered_months)):
            start_col = month_idx * cols_per_month + 1
            # Set column widths for monthly data
            worksheet.column_dimensions[get_column_letter(start_col)].width = 25     # Step
            worksheet.column_dimensions[get_column_letter(start_col+1)].width = 15   # Active users
            worksheet.column_dimensions[get_column_letter(start_col+2)].width = 18   # Completion rate
            worksheet.column_dimensions[get_column_letter(start_col+3)].width = 15   # Abandonments
            worksheet.column_dimensions[get_column_letter(start_col+4)].width = 18   # Abandonment rate
        
        # Save the workbook
        workbook.save(excel_file_path)
        
        print(f"\n✅ FUNNEL SHEET ADDED SUCCESSFULLY!")
        print(f"   - Monthly_Funnel_Report: Added to {excel_file_path}")
        print(f"   - Combined Summary Table: Shows data truncated to {max_days} days")
        print(f"   - Period Order: Last Month → Last Year → Latest Month")
        print(f"   - Data organized horizontally by {len(ordered_months)} months")
        print(f"   - Professional formatting with merged cells and borders")
        
        return True
        
    except Exception as e:
        print(f"❌ Error processing funnel data: {e}")
        import traceback
        traceback.print_exc()
        return False

In [343]:
# Execute funnel processing and add to main Excel file
print(f"\n🎯 Processing funnel analysis and adding to main report...")
funnel_success = process_and_add_funnel_report()

if funnel_success:
    print(f"✅ Funnel analysis added successfully to invoice_day_channel_report_compatible.xlsx")
else:
    print(f"⚠️ Funnel analysis could not be added - continuing with main report")


🎯 Processing funnel analysis and adding to main report...
🔍 Funnel folder paths: ['test/DSR\\June-2025\\funnel', 'test/DSR\\May-2025\\funnel', 'test/DSR\\June-2024\\funnel']
🔍 Processing funnel CSV files from 3 month folders...

--- Processing June-2025 funnel folder (29 files) ---
✅ download - 2025-06-30T140458.786.csv - Date: 2025-06-01
✅ download - 2025-06-30T140525.819.csv - Date: 2025-06-02
✅ download - 2025-06-30T140544.597.csv - Date: 2025-06-03
✅ download - 2025-06-30T140606.635.csv - Date: 2025-06-04
✅ download - 2025-06-30T140625.091.csv - Date: 2025-06-05
✅ download - 2025-06-30T140645.529.csv - Date: 2025-06-06
✅ download - 2025-06-30T140705.972.csv - Date: 2025-06-07
✅ download - 2025-06-30T140723.849.csv - Date: 2025-06-08
✅ download - 2025-06-30T140912.985.csv - Date: 2025-06-09
✅ download - 2025-06-30T140946.554.csv - Date: 2025-06-10
✅ download - 2025-06-30T141011.751.csv - Date: 2025-06-11
✅ download - 2025-06-30T141033.582.csv - Date: 2025-06-12
✅ download - 2025-06