In [1]:
# Import packages and read-in files

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os
import math 

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv('channel_0_conversion_rates.csv')
ch_0_schedule = pd.read_csv('channel_0_schedule.csv')
ch_1_conversion_rates = pd.read_csv('channel_1_conversion_rates.csv')
ch_1_schedule = pd.read_csv('channel_1_schedule.csv')
ch_2_conversion_rates = pd.read_csv('channel_2_conversion_rates.csv')
ch_2_schedule = pd.read_csv('channel_2_schedule.csv')
ch_A_schedule = pd.read_csv('channel_A_schedule.csv')
movies_df = pd.read_csv('movie_database.csv')

In [2]:
# To use right xpress and get rid of unnecessary error codes
xp.init('C:/xpressmp/bin/xpauth.xpr')
pd.options.mode.copy_on_write = True
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Formatting of all dataframes

In [3]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Unnamed: 0'])
    df.set_index('Date', inplace=True)
    df.drop('Unnamed: 0', axis=1, inplace = True)
   

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)

In [4]:
# Put all dataframes into 30-min slots instead of 5-min slots

# Ch A 30 min compressing
from datetime import time

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 30)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_A_schedule_30min['time'] = ch_A_schedule_30min['Date'].dt.time
ch_A_schedule_30min = ch_A_schedule_30min[
    (ch_A_schedule_30min['time'] >= daily_start_time) &
    (ch_A_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])
ch_A_schedule_30min.reset_index(inplace = True)
ch_A_schedule_30min.drop('index', axis = 1,inplace = True)

# Ch 0 30 min compressing
from datetime import time
ch_0 = pd.DataFrame(ch_0_schedule['ad_slot_price'])


# Resample to 30-minute intervals (use mean of groups)
ch_0_schedule_30min = ch_0.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 55)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_0_schedule_30min['time'] = ch_0_schedule_30min['Date'].dt.time
ch_0_schedule_30min = ch_0_schedule_30min[
    (ch_0_schedule_30min['time'] >= daily_start_time) &
    (ch_0_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])
ch_0_schedule_30min.reset_index(inplace = True)
ch_0_schedule_30min.drop('index', axis = 1,inplace = True)

# Ch 1 30 min compressing
from datetime import time
ch_1 = pd.DataFrame(ch_1_schedule['ad_slot_price'])


# Resample to 30-minute intervals (use mean of groups)
ch_1_schedule_30min = ch_1.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 55)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_1_schedule_30min['time'] = ch_1_schedule_30min['Date'].dt.time
ch_1_schedule_30min = ch_1_schedule_30min[
    (ch_1_schedule_30min['time'] >= daily_start_time) &
    (ch_1_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])
ch_1_schedule_30min.reset_index(inplace = True)
ch_1_schedule_30min.drop('index', axis = 1,inplace = True)

# Ch 2 30 min compressing
from datetime import time
ch_2 = pd.DataFrame(ch_2_schedule['ad_slot_price'])


# Resample to 30-minute intervals (use mean of groups)
ch_2_schedule_30min = ch_2.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 55)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_2_schedule_30min['time'] = ch_2_schedule_30min['Date'].dt.time
ch_2_schedule_30min = ch_2_schedule_30min[
    (ch_2_schedule_30min['time'] >= daily_start_time) &
    (ch_2_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])

ch_2_schedule_30min.reset_index(inplace = True)
ch_2_schedule_30min.drop('index', axis = 1,inplace = True)

# Getting Movie dataframe together

In [5]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)

In [6]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")

Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


## Choose Number of Days and Movies, then format time slots and datetime layout
Specify how many movies you want to practice code with and how many days you want schedule to be created for 

In [132]:
####
# Specify size of small_movies to practice and debug
movies_small = movies_df.head(550)

# make copy for movie info later since the model takes the movie titles out of movies_small
movies_copy = movies_small.copy(deep=True)

# change num_days based on number of days you want the scheduling function to create
num_days = 3

In [8]:
# create time slots and whatnot

from datetime import datetime, timedelta
# Define the broadcasting start and end dates
broadcast_start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
broadcast_end_date = broadcast_start_date + timedelta(days=num_days)  # 7 days including start date

# Define daily broadcast start and end times
daily_broadcast_start_time = timedelta(hours=7, minutes=0)
daily_broadcast_end_time = timedelta(hours=23, minutes=30)

# Generate all time slots over the date range
time_slots = []
current_date = broadcast_start_date
while current_date <= broadcast_end_date:
    # Set the start and end times for the current day
    day_start = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_start_time
    day_end = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_end_time
    
    current_time = day_start
    while current_time <= day_end:
        time_slots.append(current_time)
        current_time += timedelta(minutes=slot_duration)
    
    # Move to the next day
    current_date += timedelta(days=1)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}

# Function to calculate viewership of each movie (will be used in objective function)

In [9]:
# Function to find viewership of each movie based on popularity of each demographic
def get_views(m, t_idx, movies, our_channel):
    # Baseline viewership at time slot t_idx
    baseline = {
        'children': our_channel.iloc[t_idx]['children_baseline_view_count'],
        'adults': our_channel.iloc[t_idx]['adults_baseline_view_count'],
        'retirees': our_channel.iloc[t_idx]['retirees_baseline_view_count']
    }
    
    # Scaled popularity of movie m
    popularity = {
        'children': movies.loc[m, 'children_scaled_popularity'],
        'adults': movies.loc[m, 'adults_scaled_popularity'],
        'retirees': movies.loc[m, 'retirees_scaled_popularity']
    }
    
    # Calculate expected viewership (assuming total population of 1,000,000)
    total_population = 1_000_000
    viewership = sum(
        baseline[demo] * popularity[demo] * total_population
        for demo in ['children', 'adults', 'retirees']
    )
    
    return viewership

# The Model

In [10]:
# Function to create xpress problem, decision vars, and objective function
# to maximize viewership
def model(T, movies, our_channel):
    prob = xp.problem(name="Movie_Scheduling_Problem")
    
    # Constants
    slots_per_day = 34  # Number of slots per day (e.g., from 07:00 to 23:30)
    days = len(time_slots) // slots_per_day  # Number of days in the schedule
    M = len(time_slots)  # Big M for constraints
    T_end = len(time_slots) - 1  # Last time slot index

    
    # Decision Variables
    x = {(m, t): xp.var(vartype=xp.binary, name='x_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(x.values()))
    
    y = {m: xp.var(vartype=xp.binary, name='y_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(y.values()))
 
    
    s = {m: xp.var(vartype=xp.integer, name='s_{0}'.format(m)) for m in movies.index}
    e = {m: xp.var(vartype=xp.integer, name='e_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(s.values()))
    prob.addVariable(list(e.values()))
    
    u = {(m, t): xp.var(vartype=xp.continuous, name='u_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(u.values()))
    
    decision_vars = [x, y, s, e]

    # Constraints
    # 1. Movie Duration Constraint
    for m in movies.index:
        prob.addConstraint(
            xp.Sum(x[m, t] for t in T) == movies.loc[m, 'slots_needed'] * y[m]
        )
    # 2. Time Slot Occupancy Constraint
    for t in T:
        prob.addConstraint(
            xp.Sum(x[m, t] for m in movies.index) == 1
        )
    # 3. Start Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                s[m] <= t * x[m, t] + (1 - x[m, t]) * M
            )
    # 4. End Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                e[m] >= (t + 1) * x[m, t]
            )
    # 5. Movie Duration Relationship Constraint
    for m in movies.index:
        prob.addConstraint(
            e[m] - s[m] == movies.loc[m, 'slots_needed'] * y[m]
        )
    # 6 last movie ends at midnight
    prob.addConstraint(s[m] + movies.loc[m, 'slots_needed'] -1 <= T_end for m in movies.index)
    # 7. View Count Constraints
    for m in movies.index:
        for t_idx, t in enumerate(T):
            expected_viewership = get_views(m, t_idx, movies, our_channel)
            prob.addConstraint(
                u[m, t] == expected_viewership * x[m, t]
            )
    # 9. Objective Function
    prob.setObjective(
        xp.Sum(u[m, t] for m in movies.index for t in T),
        sense=xp.maximize
    )
    
    return prob, decision_vars


In [12]:
# Making schedule layout
def get_time(slot_index):
    return time_slots[slot_index].time()

used_movie_ids = []
def get_sched(prob, movies, decision_vars): 
    
    x = decision_vars[0]
    y = decision_vars[1]
    s = decision_vars[2]
    e = decision_vars[3]
    
    scheduled_movies = []
    used_movie_ids = []

    for m_idx, m_row in movies.iterrows():
        y_value = prob.getSolution(y[m_idx])
        if y_value > 0.5:  # Movie is scheduled
            used_movie_ids.append(m_idx)
            start_slot = int(prob.getSolution(s[m_idx]))
            end_slot = int(prob.getSolution(e[m_idx]))
    
            # Ensure the end slot is valid
            if end_slot < len(time_slots):
                # Get the corresponding day and times
                start_time = time_slots[start_slot]
                end_time = time_slots[end_slot]
                day = start_time.strftime('%Y-%m-%d')
                start_time_formatted = start_time.strftime('%H:%M')
                end_time_formatted = end_time.strftime('%H:%M')
    
                # Append the movie schedule
                scheduled_movies.append({
                    'Time Slot': start_slot,
                    'Day': day,
                    'Start Time': start_time_formatted,
                    'End Time': end_time_formatted,
                    'Movie Index': m_idx,
                    'Movie Title': m_row['title'], 
                    'num_slots': m_row['slots_needed'],
                    'movie_budget': m_row['budget'],
                    'box_office_revenue': m_row['revenue'],
                    'n_ad_breaks': m_row['n_ad_breaks']
                })
    # Convert to a DataFrame for better organization
    schedule_df = pd.DataFrame(scheduled_movies) 
    
    # Sort by day and time slot
    schedule_df.sort_values(by=['Day', 'Time Slot'], inplace=True)
    
    # Reset index for cleaner output
    schedule_df.reset_index(drop=True, inplace=True)

    # Print the schedule
    print("Scheduled Movies in Order:")
    print(schedule_df)

    return scheduled_movies, used_movie_ids, schedule_df

In [13]:
# Putting it all together

def movie_sched(number_days, movies, our_channel):
    used_movie_ids= []
    total_schedule = [] # list of 
    scheduled_dates = {}  # Dictionary to track when movies were scheduled by movie_id
    
    full_list = []
    flat_list = []
    for k in range(number_days):
        if k == 0:
            T = range(k*34, ((k*34)+33)+1)
    
            prob, decision_vars =  model(T, movies, our_channel)
    
            prob.solve()
            
            scheduled_movies, used_movie_ids, schedule_df = get_sched(prob,movies,decision_vars)
            
            # add scheduled movies to a list
            full_list.append(scheduled_movies)
    
        # take used movies out of df so we don't repeat movies
            for i in used_movie_ids:
                movies.drop(i, inplace = True)
            total_schedule.append(scheduled_movies)

        elif k > 0:
            # 34*3 = 102 but index at zero --> 3 days is rows 0-101
            # 34*2 = 68 (start of day 3)
            # Day 1: 0-33  ( +33)
            # Day 2: 34-67 ( +34)
            # Day 3: 68-102 ( +34)
            
            T = range(k*34, (k+1)*34)
    
            prob, decision_vars =  model(T, movies, our_channel)
    
            prob.solve()
            
            scheduled_movies, used_movie_ids, schedule_df = get_sched(prob,movies,decision_vars)
            
            # add scheduled movies to a list
            full_list.append(scheduled_movies)
    
        # take used movies out of df so we don't repeat movies
            for i in used_movie_ids:
                movies.drop(i, inplace = True)
            total_schedule.append(scheduled_movies)
        
    # unpack the list of list of dictionaries so we can make it into a df
    for i in full_list:
        for item in i:
            flat_list.append(item)
            
    # make df of all movies used for all days, ordered by day then by time slot        
    full_df = pd.DataFrame.from_dict(flat_list, orient='columns')
    full_df.sort_values(by=['Day', 'Time Slot'], inplace=True)
    full_df.reset_index(drop=True, inplace=True)
        
    print('full df:', full_df)
    return full_df

In [49]:
# Code to find licensings fee (cost to air each movie)
import numpy as np
import pandas as pd
movie_money_info = pd.DataFrame()

def calculate_license_price(schedule_df: pd.DataFrame) -> pd.Series:
    '''
    Works out the cost required to buy a specific ad slot.  This is based on the time
    of day, and the budget/earnings of the movie being shown before the
    chosen ad slot.

    This function is applied to a schedule dataframe to create a new column
    containing the ad slot prices, returns NaN if the slot is not an ad slot.

    This is also multiplied by the prime time factor, desired profit margin does
    not take into account the effects of prime time factor currently, i.e.
    there'll be a larger profit margin obtained than the one specified for spots
    in prime time.

    Values used in generation of dataset.
    base_fee = 10_000
    profit_margin = 0.2
    budget_factor = 0.002
    box_office_revenue_factor = 0.001

    :param schedule_df: Dataframe containing the populated schedule with movies and
                      : ad breaks.
    :param base_fee: Base fee required for all movies to be licensed to a channel
    :param profit_margin: Percent (in 0-1 scale) of license fee that the channel
                        : wants to make in profit.
    :param budget_factor: What percent (in 0-1 scale) of the movie's budget contributes
                        : to the license fee.
    :param box_office_factor: What percent (in 0-1 scale) of the movie's box office renvenue
                            : contributes to the license fee.
    '''

    # license_fee = (base_fee
    #                + (budget_factor * schedule_df.movie_budget)
    #                + (box_office_factor *schedule_df.box_office_revenue)
    #                ) * (1. + profit_margin)

    license_fee = (10_000
                   + (0.002 * schedule_df.movie_budget)
                   + (0.001 *schedule_df.box_office_revenue)
                   ) * (1. + 0.2)

    ad_slot_cost = (license_fee / schedule_df.n_ad_breaks) #* schedule_df.prime_time_factor
    
    movie_money_info['movie'] = schedule_df['Movie Title'] 
    movie_money_info['license_fee'] = round(license_fee,2)
    movie_money_info['cost_per_ad_slot_cost'] = round(ad_slot_cost,2)
    movie_money_info['num_slots'] = schedule_df['n_ad_breaks']

    # return np.round(ad_slot_cost, 2)
    return movie_money_info

# Find money information based on generated schedule (change num_days and size of movies_small if desired, but make sure you run all cells again)
## df returned from license_fee function is called "movie_money_info" and can be referenced later on in code 

In [15]:
# test for 3 days 
# 
three_days = movie_sched(number_days = num_days, movies = movies_small, our_channel= ch_A_schedule_30min)

FICO Xpress v9.4.2, Hyper, solve started 13:23:36, Nov 24, 2024
Heap usage: 29MB (peak 29MB, 9179KB system)
Maximizing MILP Movie_Scheduling_Problem using up to 12 threads and up to 7528MB memory, with these control settings:
OUTPUTLOG = 1
NLPPOSTSOLVE = 1
XSLP_DELETIONCONTROL = 0
XSLP_OBJSENSE = -1
Original problem has:
     57784 rows        39050 cols       152350 elements     20350 entities
Presolved problem has:
     38534 rows        20350 cols       114400 elements     20350 entities
LP relaxation tightened
Presolve finished in 0 seconds
Heap usage: 43MB (peak 68MB, 9179KB system)

Coefficient range                    original                 solved        
  Coefficients   [min,max] : [ 3.31e-02,  1.71e+05] / [ 7.81e-03,  1.98e+00]
  RHS and bounds [min,max] : [ 1.00e+00,  1.36e+02] / [ 1.00e+00,  1.36e+02]
  Objective      [min,max] : [ 1.00e+00,  1.00e+00] / [ 3.31e-02,  1.71e+05]
Autoscaling applied standard scaling

Symmetric problem: generators: 24, support set: 1295
 Numb

In [50]:
calculate_license_price(schedule_df= three_days)

Unnamed: 0,movie,license_fee,cost_per_ad_slot_cost,num_slots
0,Thor: Love and Thunder,1525113.7,381278.42,4
1,Titanic,3208994.82,458427.83,7
2,Forrest Gump,956865.26,191373.05,5
3,The Hangover,659173.0,164793.25,4
4,The Wolf of Wall Street,722400.0,103200.0,7
5,After,128997.1,32249.28,4
6,Harry Potter and the Order of the Phoenix,1497855.29,299571.06,5
7,GoodFellas,128202.0,25640.4,5
8,The Shawshank Redemption,106009.76,21201.95,5
9,Cruella,772203.88,154440.78,5


In [18]:
# # get specifically prime time ad slots 
# prime_time_df= pd.DataFrame(data = ch_A_schedule_30min.iloc[22:30,:])
# for k in range(84):
#     rest = ch_A_schedule_30min.iloc[22+(34*k):30+(34*k),:]
#     prime_time_df = pd.concat([prime_time_df,rest])
# #print(prime_time_df)

In [46]:
cha3 = ch_A_schedule_30min.head(103)
# cha3.drop(['children_baseline_view_count', 'adults_baseline_view_count','retirees_baseline_view_count'], axis=1, inplace=True)
cha3.head()

Unnamed: 0,Date,children_baseline_view_count,adults_baseline_view_count,retirees_baseline_view_count,prime_time_factor
0,2024-10-01 07:00:00,0.019383,0.022296,0.003611,1.0
1,2024-10-01 07:30:00,0.015485,0.022928,0.004694,1.0
2,2024-10-01 08:00:00,0.009684,0.024245,0.006021,1.0
3,2024-10-01 08:30:00,0.004744,0.026238,0.007612,1.0
4,2024-10-01 09:00:00,0.001831,0.028892,0.009478,1.0


In [51]:
def fill_ch_A(df_to_fill, df_of_movies):
    d = 0
    x = 0
    for i in df_of_movies['num_slots']:
        # val = df_of_movies['Movie Title'].iloc[d]
        df_to_fill.loc[x:x+i, 'movie'] = df_of_movies['Movie Title'].iloc[d]
        df_to_fill.loc[x:x+i, 'ad_cost'] = movie_money_info['cost_per_ad_slot_cost'].iloc[d]
        d += 1
        x += i
    # print(df_to_fill['movie'])
    return df_to_fill
    
fill_ch_A(df_to_fill = cha3, df_of_movies = three_days)

Unnamed: 0,Date,children_baseline_view_count,adults_baseline_view_count,retirees_baseline_view_count,prime_time_factor,movie,ad_cost
0,2024-10-01 07:00:00,0.019383,0.022296,0.003611,1.000000,Thor: Love and Thunder,381278.42
1,2024-10-01 07:30:00,0.015485,0.022928,0.004694,1.000000,Thor: Love and Thunder,381278.42
2,2024-10-01 08:00:00,0.009684,0.024245,0.006021,1.000000,Thor: Love and Thunder,381278.42
3,2024-10-01 08:30:00,0.004744,0.026238,0.007612,1.000000,Thor: Love and Thunder,381278.42
4,2024-10-01 09:00:00,0.001831,0.028892,0.009478,1.000000,Thor: Love and Thunder,381278.42
...,...,...,...,...,...,...,...
98,2024-10-03 22:00:00,0.001094,0.101299,0.012007,1.083333,Saw,34893.50
99,2024-10-03 22:30:00,0.000555,0.095718,0.009817,1.000000,Saw,34893.50
100,2024-10-03 23:00:00,0.000264,0.089721,0.007904,1.000000,Saw,34893.50
101,2024-10-03 23:30:00,0.000118,0.083435,0.006268,1.000000,Saw,34893.50


In [66]:
all_slots = pd.DataFrame()
# cost per ad slot on each channel
all_slots['0'] = ch_0_schedule_30min['ad_slot_price']
all_slots['1'] = ch_1_schedule_30min['ad_slot_price']
all_slots['2'] = ch_2_schedule_30min['ad_slot_price']

# basic stats
# all_slots['avg'] = round(all_slots.mean(axis = 1), 2)
all_slots['max'] = all_slots.max(axis=1)
all_slots['min'] = all_slots.min(axis=1)
all_slots['our price'] = round(cha3['ad_cost'],2)

# identify best and worst channel 
all_slots['max ch #'] = all_slots[['0','1','2']].idxmax(axis=1)
all_slots['min ch #'] = all_slots[['0','1','2']].idxmin(axis=1)

# potential profits/ upcharges
# find out percentage difference of most expensive ad compared to ours
all_slots['times_more_expensive'] = round(all_slots['our price']/all_slots['max'],2)
# all_slots['smallest_profit'] = all_slots['our_sell_price'] - all_slots['max']
# all_slots['greatest_profit'] = all_slots['our_sell_price'] - all_slots['min']
all_slots['cost_per_viewer'] = round(all_slots['our price'] / ((cha3['children_baseline_view_count']+cha3['adults_baseline_view_count']+cha3['retirees_baseline_view_count'])*1_000_000),2)


all_slots.head()

Unnamed: 0,0,1,2,max,min,our price,max ch #,min ch #,times_more_expensive,cost_per_viewer
0,56995.17,25458.66,365283.35,365283.35,25458.66,381278.42,2,1,1.04,8.42
1,56995.17,25458.66,365283.35,365283.35,25458.66,381278.42,2,1,1.04,8.84
2,25479.06,16629.1,365283.35,365283.35,16629.1,381278.42,2,1,1.04,9.54
3,25479.06,16629.1,365283.35,365283.35,16629.1,381278.42,2,1,1.04,9.88
4,25479.06,16629.1,17700.0,25479.06,16629.1,381278.42,0,1,14.96,9.48


In [85]:
cha3['ad_cost'].sum()/((cha3['children_baseline_view_count'].sum() + cha3['adults_baseline_view_count'].sum() + cha3['retirees_baseline_view_count'].sum())*1000000)

1.6894083059535145

In [170]:

p = []
p_flat = []
for m in cha3['movie']:
    v = movies_copy.index[movies_copy['title'] == m].to_list()
    p.append(v)
for i in p:
    for it in i:
        p_flat.append(it)
    
    child_expected_viewcounts = cha3['children_baseline_view_count']*movies_copy.loc[it, 'children_scaled_popularity']
    adults_expected_viewcounts = cha3['adults_baseline_view_count']*movies_copy.loc[it, 'adults_scaled_popularity']
    retirees_expected_viewcounts = cha3['retirees_baseline_view_count']*movies_copy.loc[it, 'retirees_scaled_popularity']

cha3['child_expected_viewcounts'] = child_expected_viewcounts
cha3['adults_expected_viewcounts'] = adults_expected_viewcounts
cha3['retirees_expected_viewcounts'] = retirees_expected_viewcounts
cha3['total_expected_viewers'] = (cha3['child_expected_viewcounts']+cha3['adults_expected_viewcounts']+cha3['retirees_expected_viewcounts']*1000000)
# cha3['cost_per_viewer'] = cha3['ad_cost']/(cha3['child_expected_viewcounts']+cha3['adults_expected_viewcounts']+cha3['retirees_expected_viewcounts']*1000000)
cha3['cost_per_viewer'] = cha3['ad_cost']/cha3['total_expected_viewers']

In [172]:
cha3.iloc[29:35]

Unnamed: 0,Date,children_baseline_view_count,adults_baseline_view_count,retirees_baseline_view_count,prime_time_factor,movie,ad_cost,child_expected_viewcounts,adults_expected_viewcounts,retirees_expected_viewcounts,cost_per_viewer,total_expected_viewers
29,2024-10-01 21:30:00,0.002026,0.10634,0.014459,1.5,The Wolf of Wall Street,103200.0,0.00081,0.10634,0.011567,8.921563,11567.479593
30,2024-10-01 22:00:00,0.001094,0.101299,0.012007,1.083333,After,32249.28,0.000437,0.101299,0.009605,3.357408,9605.409663
31,2024-10-01 22:30:00,0.000555,0.095718,0.009817,1.0,After,32249.28,0.000222,0.095718,0.007853,4.106394,7853.430559
32,2024-10-01 23:00:00,0.000264,0.089721,0.007904,1.0,After,32249.28,0.000106,0.089721,0.006323,5.100376,6322.921664
33,2024-10-01 23:30:00,0.000118,0.083435,0.006268,1.0,After,32249.28,4.7e-05,0.083435,0.005014,6.431711,5014.105994
34,2024-10-02 07:00:00,0.019383,0.022296,0.003611,1.0,Harry Potter and the Order of the Phoenix,299571.06,0.007753,0.022296,0.002889,103.695518,2888.948984
