## PHD media data to Hellman_Media_Daily.xlsx

In [3]:
import pandas as pd

# Dates of Media data you want to cover and input file name
file_input = "Unilever Data Submission Nov 2024 - Complete (Social,Digital, Instacart, Amazon, Google Ads, CMR DIrectIO, Apple Ads, Walmart, Loblaws).xlsx"
data_range = pd.date_range(start='2023-01-01', end='2024-10-31', freq='D')
merged_Impression = pd.DataFrame(index=data_range)
merged_Spend = pd.DataFrame(index=data_range)

# Media Channels that you want to include
channels = ["Social", "Programmatic", "Instacart", "Amazon Search", "Google Ads", "CMR (TV)", "DirectIO"]

# Social
df = pd.read_excel(file_input, sheet_name="Social")
df = df[df['Account name'] == "Hellmann's"]

for media, media_df in df.groupby(["Category", "Platform"]):
    title = f"Social | Hellman | {media[0]} | {media[1]}"
    media_df = media_df.groupby("Date")[['Amount Spent', 'Impressions']].sum()
    media_df.rename(columns={"Amount Spent": f"{title} - Spend", "Impressions": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')


# Programmatic
df = pd.read_excel(file_input, sheet_name="Programmatic")
df = df[df['Advertiser'].isin(["CA_Hellmann's_Unilever"])]
df['Advertiser'] = df['Advertiser'].replace("CA_Hellmann's_Unilever", 'Hellman')

for media, media_df in df.groupby(["Advertiser", "Category", "Platform", "Ad Type"]):
    title = f"Programmatic | {media[0]} | {media[1]} | {media[2]} | {media[3]}"
    media_df = media_df.groupby("Date")[['Total Cost', 'Total Impressions']].sum()
    media_df.rename(columns={"Total Cost": f"{title} - Spend", "Total Impressions": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')

# Instacart
df = pd.read_excel(file_input, sheet_name="Instacart")
df = df[df['Brand Name'] == "Hellmann's"]

for media, media_df in df.groupby(["Category"]):
    title = f"Instacart | Hellman | {media[0]}"
    media_df = media_df.groupby("Time")[['Spend', 'Impressions']].sum()
    media_df.rename(columns={"Spend": f"{title} - Spend", "Impressions": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')

# Amazon Search
df = pd.read_excel(file_input, sheet_name="Amazon Search")
df = df[df['Brand Name'] == "Hellmann's"]

for media, media_df in df.groupby(["Category"]):
    title = f"Amazon Search | Hellman | {media[0]}"
    media_df = media_df.groupby("Date")[['Spend', 'Impression']].sum()
    media_df.rename(columns={"Spend": f"{title} - Spend", "Impression": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')

# Google Ads
df = pd.read_excel(file_input, sheet_name="Google Ads")
df = df[df['Brand Name'] == "Hellmanns"]

for media, media_df in df.groupby(["Category"]):
    title = f"Google Ads | Hellman | {media[0]}"
    media_df = media_df.groupby("Day")[['Cost', 'Impressions']].sum()
    media_df.rename(columns={"Cost": f"{title} - Spend", "Impressions": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')

# CMR (TV)
df = pd.read_excel(file_input, sheet_name="CMR (TV)")
df = df[df["Concate"].isin(["Hellmanns - Nutrition - English", "Hellmanns - Nutrition - French"])]

for media, media_df in df.groupby(["Concate", "Duration (S)"]):
    title = f"TV | {media[0]} - {media[1]}"
    media_df = media_df.groupby("BroadcastDate")[['Amount Spent', 'PgmGRP']].sum()
    media_df.rename(columns={"Amount Spent": f"{title} - Spend", "PgmGRP": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')

# DirectIO
df = pd.read_excel(file_input, sheet_name="DirectIO")
df = df[df['Brand Name'] == "Hellmann's"]

for media, media_df in df.groupby(["Category", "Platform"]):
    title = f"DirectIO | Hellman | {media[0]} | {media[1]}"
    media_df = media_df.groupby("Date")[["Amount Spent", "Total Impressions"]].sum()
    media_df.rename(columns={"Amount Spent": f"{title} - Spend", "Total Impressions": f"{title} - Impressions"}, inplace=True)

    # Join the DataFrames
    merged_Impression = merged_Impression.join(media_df[f"{title} - Impressions"], how='left')
    merged_Spend = merged_Spend.join(media_df[f"{title} - Spend"], how='left')



# Transfer to a xlsx file
with pd.ExcelWriter("Hellman_Media_Daily.xlsx") as writer:
    merged_Impression.index = merged_Impression.index.strftime('%Y-%m-%d')
    merged_Spend.index = merged_Spend.index.strftime('%Y-%m-%d')
    merged_Impression.fillna(0, inplace=True)
    merged_Spend.fillna(0, inplace=True)
    merged_Impression.to_excel(writer, sheet_name='Impression', index=True, index_label='Date')
    merged_Spend.to_excel(writer, sheet_name='Spend', index=True, index_label='Date')
    print("Mission Completed!")

Mission Completed!


## Hellman_Media_Daily -> Hellman_Media_Weekly

In [4]:
import pandas as pd

# Load daily media data from an Excel file
input_file_path = 'Hellman_Media_Daily.xlsx'
output_file_path = "Hellman_Media_Weekly.xlsx"
merged_df = []

# List of sheet names to read from the Excel file
sheet_names = ["Impression", "Spend"] 
end_date = data_range.max()

# Read each sheet into a DataFrame and merge them
for sheet_name in sheet_names:
    df = pd.read_excel(input_file_path, sheet_name=sheet_name, index_col='Date')
    df.index = pd.to_datetime(df.index)
    start_date = pd.to_datetime('2023-01-01') # Define the start and end dates for the first week
    first_week_end_date = pd.to_datetime('2023-01-08')
    aggregated_media = []
    first_week_data = df.loc[start_date:first_week_end_date].sum()
    first_week_data['week_end_date'] = first_week_end_date
    aggregated_media.append(first_week_data)
    # Define the start date for the subsequent weeks
    current_start_date = first_week_end_date + pd.Timedelta(days=1)
    current_end_date = current_start_date + pd.Timedelta(days=6)
    # Iterate through the weeks until end date
    while current_end_date <= end_date:
        weekly_data = df.loc[current_start_date:current_end_date].sum()
        weekly_data['week_end_date'] = current_end_date
        aggregated_media.append(weekly_data)
        # Move to the next week
        current_start_date = current_end_date + pd.Timedelta(days=1)
        current_end_date = current_start_date + pd.Timedelta(days=6)

    # Convert to DataFrame
    aggregated_media_df = pd.DataFrame(aggregated_media)

    # Set the 'week_end_date' as the index and rename it to 'Date'
    aggregated_media_df.set_index('week_end_date', inplace=True)
    aggregated_media_df.index.name = 'Date'
    aggregated_media_df.index = aggregated_media_df.index.strftime('%Y-%m-%d')

    merged_df.append(aggregated_media_df)
      
with pd.ExcelWriter(output_file_path) as writer:
    merged_df[0].to_excel(writer, sheet_name='Impression', index=True, index_label = "Date")
    merged_df[1].to_excel(writer, sheet_name='Spend', index=True, index_label = "Date")
    print("Mission Completed!")

Mission Completed!
