In [None]:
import analytics.charts as ac
import analytics.api as ga
import pandas as pd
from constants import *

In [None]:
default_params = {
    "service_system": ac.authenticate_ga(SECRET_NAME, ga.ga4_service_params, port=OAUTH_PORT),
    "start_date": '2024-10-16',
    "end_date": '2024-11-16',
}

catalog_params = {
    **default_params,
    "property": PROPERTY_ID_MAP[ANVIL_CATALOG_NAME],
}
explorer_params = {
    **default_params,
    "property": PROPERTY_ID_MAP[ANVIL_EXPLORER_NAME],
}
portal_params = {
    **default_params,
    "base_dimension_filter": EXCLUDE_PAGES_FILTER,
    "property": PROPERTY_ID_MAP[ANVIL_PORTAL_NAME],
}


params_by_property = {
    ANVIL_PORTAL_NAME: portal_params,
    ANVIL_CATALOG_NAME: catalog_params,
    ANVIL_EXPLORER_NAME: explorer_params,
}

In [None]:
results_dict = {}
pd.set_option('future.no_silent_downcasting', True)
for key in params_by_property:
    # Get the builtin "Click" event
    df_builtin_links = (ac.get_data_df(
        ["eventCount", "totalUsers"],
        ["pagePath", "linkUrl", "eventName"], 
        **params_by_property[key], 
        df_processor=lambda df: df.loc[
                df.index.get_level_values(1) != ""
            ].groupby(
                ["pagePath", "linkUrl"]
            ).sum().reset_index().rename(
                columns={"linkUrl": "builtin_url"}
            ),
        num_keep_dimensions=2
    ))
    # Get the custom "outbound_link_click" event
    df_outbound_links = (ac.get_data_df(
        ["eventCount", "totalUsers"],
        ["pagePath", "customEvent:click_url", "eventName"], 
        **params_by_property[key], 
        df_processor=lambda df: df.loc[
            df.index.get_level_values(1) != "(not set)"
        ].groupby(
            ["pagePath", "customEvent:click_url"]
        ).sum().reset_index().rename(
            columns={"customEvent:click_url": "outbound_url"}
        ),
        num_keep_dimensions=2
    ))
    # Concatenate the two dataframes, avoiding duplicates
    # Keep the link from the builtin event, unless the link contains a #fragment, in which case keep the link from the custom event
    df_builtin_links["builtin"] = True
    df_builtin_links["truncated_url"] = df_builtin_links["builtin_url"]
    df_outbound_links["truncated_url"] = df_outbound_links["outbound_url"].str.replace(r"#.*", "", regex=True)
    df_outbound_links_fragments = df_outbound_links.loc[df_outbound_links["outbound_url"].str.contains("#")]
    df_outbound_links_fragments["is_fragment"] = True
    df_all_links = pd.concat(
        [df_builtin_links, df_outbound_links_fragments], ignore_index=True
    )
    df_all_links = df_all_links.loc[
        ~(df_all_links["truncated_url"].isin(df_outbound_links_fragments["truncated_url"]) & df_all_links["builtin"])
    ].sort_values("eventCount", ascending=False)
    # Determine whther a lnk is a fragment or an outbound link
    df_all_links["outbound"] = df_all_links["truncated_url"].isin(df_outbound_links["truncated_url"])
    df_all_links["is_fragment"] = df_all_links["is_fragment"].fillna(False).astype(bool)
    df_all_links["complete_url"]  = df_all_links["builtin_url"].where(
        ~df_all_links["is_fragment"],
        df_all_links["outbound_url"]
    )
    df_all_links = df_all_links.drop(
        columns=["builtin_url", "outbound_url", "builtin", "is_fragment"]
    ).rename(
        columns={"pagePath": "Page Path", "complete_url": "Complete URL", "eventCount": "Total Clicks", "totalUsers": "Total Users", "outbound": "Outbound"} 
    )[["Page Path", "Complete URL", "Total Clicks", "Total Users", "Outbound"]]
    results_dict[key] = df_all_links.copy().reset_index(drop=True)

with pd.ExcelWriter("outlinks_summary.xlsx", engine="xlsxwriter") as writer:
    for key in results_dict:
        results_dict[key].to_excel(writer, sheet_name=key, index=False)
        # Set column widths to equal the length of the longest item in the column
        sheet = writer.sheets[key]
        for idx, col in enumerate(results_dict[key]):  
            series = results_dict[key][col]
            max_len = max((
                series.astype(str).map(len).max(),  
                len(str(series.name))  
                )) + 1  
            sheet.set_column(idx, idx, max_len)  