In [2]:
from landing_page_report_v1 import load_data, filter_channels, get_top_pages, summarize_data, generate_markdown

In [3]:
# File paths
mom_file = r'/content/USDOnline-DashboardMk2_RLandingPagesMoM_Table.csv'
yoy_file = r'/content/USDOnline-DashboardMk2_RLandingPagesYoY_Table.csv'
channels_of_interest = ['Organic Search', 'Paid Search', 'Paid Social']

# Load data
mom_data, yoy_data = load_data(mom_file, yoy_file)

# Diagnostic: Check what columns actually exist
print("MoM columns:", mom_data.columns.tolist())
print("YoY columns:", yoy_data.columns.tolist())
print("MoM shape:", mom_data.shape)
print("YoY shape:", yoy_data.shape)

# Filter data
mom_filtered = filter_channels(mom_data, channels_of_interest)
yoy_filtered = filter_channels(yoy_data, channels_of_interest)

# Get top pages
mom_top_pages = get_top_pages(mom_filtered, ['program_category', 'default_channel'], 'Session', 3)
yoy_top_pages = get_top_pages(yoy_filtered, ['program_category', 'default_channel'], 'Session', 3)

# Summarize data
summary_data = summarize_data(mom_top_pages, yoy_top_pages)

# Generate markdown report
generate_markdown(summary_data, "/content/landing_page_top3_next_month_v5_update.md")

MoM columns: ['program_category', 'default_channel', 'Landing_page', 'Session', 'sessions_mom_difference', 'Pageview', 'pageview_mom_difference', 'Conversions', 'conversions_mom_difference', 'Conversion_Rate', 'conversion_rate_mom_percent_difference']
YoY columns: ['program_category', 'default_channel', 'Landing_page', 'Session', 'sessions_yoy_difference', 'Pageview', 'pageview_yoy_difference', 'Conversions', 'conversions_yoy_difference', 'Conversion_Rate', 'conversion_rate_yoy_percent_difference']
MoM shape: (4352, 11)
YoY shape: (4352, 11)
Markdown file created successfully with platform-independent newlines: /content/landing_page_top3_next_month_v5_update.md


In [4]:
# Modularized Code for Landing Page Report Generation
import pandas as pd

def load_data(mom_file, yoy_file):
    """Load Month-over-Month and Year-over-Year data from CSV files."""
    mom_data = pd.read_csv(mom_file)
    yoy_data = pd.read_csv(yoy_file)
    return mom_data, yoy_data

def filter_channels(data, channels):
    """Filter data for relevant channels."""
    return data[data['default_channel'].isin(channels)]

def get_top_pages(data, group_cols, sort_col, top_n):
    """Get the top N pages for each group based on a sorting column."""
    return (data.sort_values(by=sort_col, ascending=False)
                .groupby(group_cols)
                .head(top_n)
                .reset_index(drop=True))

def summarize_data(mom_data, yoy_data):
    """Merge and summarize Month-over-Month and Year-over-Year data."""
    merged_data = pd.merge(
        mom_data,
        yoy_data,
        on=['program_category', 'default_channel', 'Landing_page'],
        suffixes=('_mom', '_yoy')
    )
    summary = merged_data.groupby(['program_category', 'default_channel', 'Landing_page']).agg({
        'Session_mom': 'sum',
        'sessions_mom_difference': 'sum',
        'Pageview_mom': 'sum',
        'pageview_mom_difference': 'sum',
        'Conversions_mom': 'sum',
        'conversions_mom_difference': 'sum',
        'Conversion_Rate_mom': 'mean',
        'conversion_rate_mom_percent_difference': 'mean',
        'Session_yoy': 'sum',
        'sessions_yoy_difference': 'sum',
        'Pageview_yoy': 'sum',
        'pageview_yoy_difference': 'sum',
        'Conversions_yoy': 'sum',
        'conversions_yoy_difference': 'sum',
        'Conversion_Rate_yoy': 'mean',
        'conversion_rate_yoy_percent_difference': 'mean'
    }).reset_index()
    return summary

# Update the generate_markdown function to use os.linesep for platform-independent newlines
import os

def generate_markdown(summary_data, output_file):
    """Generate a markdown report from the summarized data with platform-independent newlines."""
    newline = os.linesep
    markdown_content = "## Landing Page Report" + newline
    for program in summary_data['program_category'].unique():
        markdown_content += newline + "### " + program + " Landing Page Report" + newline
        program_data = summary_data[summary_data['program_category'] == program]
        for channel in program_data['default_channel'].unique():
            markdown_content += "#### " + channel + newline
            channel_data = program_data[program_data['default_channel'] == channel]
            channel_data = channel_data.sort_values(by='Session_mom', ascending=False)
            row_count = 0
            for _, row in channel_data.iterrows():
                # For Organic Search, only show top 3 pages (no other filtering)
                if channel == 'Organic Search':
                    row_count += 1
                    if row_count > 3:
                        break
                else:
                    # For non-Organic Search channels, skip rows with 0 conversions
                    if row['Conversions_mom'] == 0:
                        continue

                    # Skip rows for Paid Search and Paid Social if sessions < 10
                    if channel in ['Paid Search', 'Paid Social'] and row['Session_mom'] < 10:
                        continue

                # Build the basic session information
                markdown_content += ("* \"" + row['Landing_page'] + "\": " +
                                     f"{int(row['Session_mom']):,}" + " sessions (YoY: " +
                                     ("+" if row['sessions_yoy_difference'] >= 0 else "") + f"{int(row['sessions_yoy_difference']):,}" + " | MoM: " +
                                     ("+" if row['sessions_mom_difference'] >= 0 else "") + f"{int(row['sessions_mom_difference']):,}" + ")" + newline)

                # Only add conversion information if conversions > 0
                if row['Conversions_mom'] > 0:
                    markdown_content += ("  Conversions: " + f"{int(row['Conversions_mom']):,}" + " (YoY: " +
                                         ("+" if row['conversions_yoy_difference'] >= 0 else "") + f"{int(row['conversions_yoy_difference']):,}" + ", " +
                                         ("+" if row['conversion_rate_yoy_percent_difference'] >= 0 else "") +
                                         f"{round(row['conversion_rate_yoy_percent_difference'] * 100, 2):,}" + "% | MoM: " +
                                         ("+" if row['conversions_mom_difference'] >= 0 else "") + f"{int(row['conversions_mom_difference']):,}" + ", " +
                                         ("+" if row['conversion_rate_mom_percent_difference'] >= 0 else "") +
                                         f"{round(row['conversion_rate_mom_percent_difference'] * 100, 2):,}" + "%)" + newline)
    with open(output_file, "w", newline="") as file:
        file.write(markdown_content)
    print("Markdown file created successfully with platform-independent newlines: " + output_file)
