In [2]:
import pandas as pd
import os

# --- 1. Load the DataFrame (as specified) ---

# Define the file path (adjust 'data/' if needed based on your notebook location)
# NOTE: This line assumes you have the file "data/WF 3 F1-R12 - Great Britain.xlsx"
file_path = "data/WF 3 F1-R12 - Great Britain.xlsx"

try:
    df = pd.read_excel(
        file_path,
        sheet_name="Worksheet",
        header=5 
    )
    print("✅ DataFrame successfully loaded.")
    # print("\n--- Initial DataFrame Head ---\n")
    # print(df.head())
    # print("\n------------------------------\n")

except FileNotFoundError:
    print(f"❌ Error: File not found at '{file_path}'. Please ensure the file exists and the path is correct.")
    # Create an empty DF for the function definition below to run without error
    df = pd.DataFrame() 


# --- 2. Convert the function to Standalone ---

def check_latam_espn_channels(input_df: pd.DataFrame) -> pd.DataFrame:
    """
    CHECK 1: Channel and Territory Review
    Checks the LATAM region for markets expected to have ESPN channels 
    but where no ESPN data exists, flagging common omissions.
    
    Args:
        input_df: The BSR DataFrame to check.
        
    Returns:
        The input DataFrame with 'LATAM_ESPN_Check_OK' and 
        'LATAM_ESPN_Remark' columns added.
    """
    if input_df.empty:
        print("⚠️ Warning: Input DataFrame is empty. Returning an empty DataFrame.")
        return input_df

    print("ACTION: Running LATAM ESPN Channels check (flagging missing markets).")
    df = input_df.copy()

    # --- 1. Define the LATAM Scope ---
    # Look for regions that contain 'America' (Central and South America is in your initial data)
    latam_region_mask = df.get('Region', '').astype(str).str.contains(
        r'central.*america|south.*america|latin america|latam', case=False, na=False
    )
    # Check if 'Region' column exists before filtering
    if latam_region_mask.empty:
        print("❌ Error: 'Region' column not found in DataFrame. Skipping check.")
        df['LATAM_ESPN_Check_OK'] = True
        df['LATAM_ESPN_Remark'] = "Skipped: 'Region' column missing."
        return df
        
    latam_df = df[latam_region_mask].copy()

    # Define the set of all unique markets (countries) found in the LATAM region
    all_latam_markets = set(latam_df.get('Market', '').dropna().astype(str).str.strip().unique())

    # Define the set of markets that *do* have ESPN listed
    espn_mask = latam_df.get('Broadcaster', '').astype(str).str.contains(r'espn', case=False, na=False)
    markets_with_espn = set(latam_df[espn_mask].get('Market', '').dropna().astype(str).str.strip().unique())

    # --- 2. Identify Missing Markets ---
    # Markets that are in LATAM but have no records for the 'ESPN' broadcaster
    missing_espn_markets = all_latam_markets - markets_with_espn

    print(f"Total LATAM Markets found: {len(all_latam_markets)}")
    print(f"Markets with ESPN data: {len(markets_with_espn)}")
    print(f"⚠️ Markets missing ESPN data: {missing_espn_markets}")
    
    # --- 3. Update the full DF with QC columns ---
    
    # Initialize all rows to OK
    df['LATAM_ESPN_Check_OK'] = True
    df['LATAM_ESPN_Remark'] = "OK"

    # Mark the rows that belong to the markets flagged
    flag_mask = df.get('Market', '').astype(str).str.strip().isin(missing_espn_markets)
    
    if flag_mask.any():
        df.loc[flag_mask, 'LATAM_ESPN_Check_OK'] = False
        df.loc[flag_mask, 'LATAM_ESPN_Remark'] = "Market has no ESPN data (Expected in LATAM region)."

    # The auxiliary DataFrame self.missing_latam_espn_df is no longer needed in the standalone version
    # The processed dataframe is returned.
    return df


# --- 3. Run the Function and Print the Result ---

if not df.empty:
    df_processed = check_latam_espn_channels(df)

    print("\n====================================")
    print("✅ Final Processed DataFrame Output")
    print("====================================\n")
    
    # Show the tail to easily see the new QC columns
    print(df_processed.tail()) 
    
    # Show only the flagged rows for verification (if any)
    flagged_rows = df_processed[df_processed['LATAM_ESPN_Check_OK'] == False]
    
    if not flagged_rows.empty:
        print("\n--- Flagged Rows (LATAM ESPN Missing) ---\n")
        print(flagged_rows)
    else:
        print("\n--- No LATAM ESPN markets were flagged in this dataset. ---\n")
else:
    print("Cannot proceed with the check as the DataFrame could not be loaded.")

✅ DataFrame successfully loaded.

--- Initial DataFrame Head ---

                      Region     Market  Market ID Broadcaster  \
0  Central and South America  Argentina       74.0        ESPN   
1  Central and South America  Argentina       74.0    Mediapro   
2  Central and South America  Argentina       74.0    Mediapro   
3  Central and South America  Argentina       74.0    Mediapro   
4  Central and South America  Argentina       74.0    Mediapro   

         TV-Channel  Channel ID Pay/Free TV Date (UTC/GMT)       Date  \
0        ESPN 2 ARG      1210.0         NaN     2025-07-04 2025-07-03   
1  Fox Sports (ARG)      2732.0         NaN     2025-07-04 2025-07-04   
2  Fox Sports (ARG)      2732.0         NaN     2025-07-04 2025-07-04   
3  Fox Sports (ARG)      2732.0         NaN     2025-07-05 2025-07-04   
4  Fox Sports (ARG)      2732.0         NaN     2025-07-05 2025-07-04   

                   Day  ... Spot price in Euro [1 sec.] Fixture analysis  \
0  2025-07-03 00:00:00