In [178]:
# Import packages and read-in files

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os
import math 

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv('channel_0_conversion_rates.csv')
ch_0_schedule = pd.read_csv('channel_0_schedule.csv')
ch_1_conversion_rates = pd.read_csv('channel_1_conversion_rates.csv')
ch_1_schedule = pd.read_csv('channel_1_schedule.csv')
ch_2_conversion_rates = pd.read_csv('channel_2_conversion_rates.csv')
ch_2_schedule = pd.read_csv('channel_2_schedule.csv')
ch_A_schedule = pd.read_csv('channel_A_schedule.csv')
movies_df = pd.read_csv('movie_database.csv')

In [179]:
# To use right xpress and get rid of unnecessary error codes
xp.init('C:/xpressmp/bin/xpauth.xpr')
pd.options.mode.copy_on_write = True

In [180]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Unnamed: 0'])
    df.set_index('Date', inplace=True)
    df.drop('Unnamed: 0', axis=1, inplace = True)
   

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)


In [181]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)


In [182]:
from datetime import datetime, timedelta

num_days = 2

# Define the broadcasting start and end dates
broadcast_start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
broadcast_end_date = broadcast_start_date + timedelta(days=num_days)  # 7 days including start date

# Define daily broadcast start and end times
daily_broadcast_start_time = timedelta(hours=7, minutes=0)
daily_broadcast_end_time = timedelta(hours=23, minutes=30)

# Generate all time slots over the date range
time_slots = []
current_date = broadcast_start_date
while current_date <= broadcast_end_date:
    # Set the start and end times for the current day
    day_start = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_start_time
    day_end = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_end_time
    
    current_time = day_start
    while current_time <= day_end:
        time_slots.append(current_time)
        current_time += timedelta(minutes=slot_duration)
    
    # Move to the next day
    current_date += timedelta(days=1)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}


In [183]:
# Print the first few time slots
print("First 10 Time Slots:")
for ts in time_slots[:10]:
    print(ts.strftime('%Y-%m-%d %H:%M'))

# Print the last few time slots
print("\nLast 10 Time Slots:")
for ts in time_slots[-10:]:
    print(ts.strftime('%Y-%m-%d %H:%M'))


First 10 Time Slots:
2024-10-01 07:00
2024-10-01 07:30
2024-10-01 08:00
2024-10-01 08:30
2024-10-01 09:00
2024-10-01 09:30
2024-10-01 10:00
2024-10-01 10:30
2024-10-01 11:00
2024-10-01 11:30

Last 10 Time Slots:
2024-10-03 19:00
2024-10-03 19:30
2024-10-03 20:00
2024-10-03 20:30
2024-10-03 21:00
2024-10-03 21:30
2024-10-03 22:00
2024-10-03 22:30
2024-10-03 23:00
2024-10-03 23:30


In [184]:
from datetime import time
# Set 'Date' as index
# ch_A_schedule.set_index('Date', inplace=True)

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 55)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_A_schedule_30min['time'] = ch_A_schedule_30min['Date'].dt.time
ch_A_schedule_30min = ch_A_schedule_30min[
    (ch_A_schedule_30min['time'] >= daily_start_time) &
    (ch_A_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])

# Print the first 5 rows
print("First 5 rows of ch_A_schedule_30min:")
print(ch_A_schedule_30min.head(3))



First 5 rows of ch_A_schedule_30min:
                 Date  children_baseline_view_count  \
0 2024-10-01 07:00:00                      0.019383   
1 2024-10-01 07:30:00                      0.015485   
2 2024-10-01 08:00:00                      0.009684   

   adults_baseline_view_count  retirees_baseline_view_count  prime_time_factor  
0                    0.022296                      0.003611                1.0  
1                    0.022928                      0.004694                1.0  
2                    0.024245                      0.006021                1.0  


  ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()


In [185]:
# # Map baseline viewership to time slot indices
# baseline_viewership = {}
# for idx, row in ch_A_schedule_30min.iterrows():
#     time_slot_datetime = row['Date']
#     # Find the corresponding time slot index
#     time_slot_index = time_to_index.get(time_slot_datetime, None)
#     if time_slot_index is not None:
#         baseline_viewership[time_slot_index] = {
#             'children': row['children_baseline_view_count'],
#             'adults': row['adults_baseline_view_count'],
#             'retirees': row['retirees_baseline_view_count'],
#             'prime_time_factor': row.get('prime_time_factor', 1)
#         }

# # Print the first 5 rows
# print("Print timeslot_index 1 of dictionary baseline_viewership:")
# print(baseline_viewership[1])


In [186]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")


Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


In [187]:
# Small movies DF to practice with code

movies_small = movies_df.head(550)

In [188]:
def get_views(m, t_idx, movies, our_channel):
    # Baseline viewership at time slot t_idx
    baseline = {
        'children': our_channel.iloc[t_idx]['children_baseline_view_count'],
        'adults': our_channel.iloc[t_idx]['adults_baseline_view_count'],
        'retirees': our_channel.iloc[t_idx]['retirees_baseline_view_count']
    }
    
    # Scaled popularity of movie m
    popularity = {
        'children': movies.loc[m, 'children_scaled_popularity'],
        'adults': movies.loc[m, 'adults_scaled_popularity'],
        'retirees': movies.loc[m, 'retirees_scaled_popularity']
    }
    
    # Calculate expected viewership (assuming total population of 1,000,000)
    total_population = 1_000_000
    viewership = sum(
        baseline[demo] * popularity[demo] * total_population
        for demo in ['children', 'adults', 'retirees']
    )
    
    return viewership

In [189]:
def model(T, movies, our_channel):
    prob = xp.problem(name="Movie_Scheduling_Problem")
    
    # Constants
    slots_per_day = 34  # Number of slots per day (e.g., from 07:00 to 23:30)
    days = len(time_slots) // slots_per_day  # Number of days in the schedule
    M = len(time_slots)  # Big M for constraints
    T_end = len(time_slots) - 1  # Last time slot index

    
    # Decision Variables
    x = {(m, t): xp.var(vartype=xp.binary, name='x_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(x.values()))
    
    y = {m: xp.var(vartype=xp.binary, name='y_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(y.values()))
 
    
    s = {m: xp.var(vartype=xp.integer, name='s_{0}'.format(m)) for m in movies.index}
    e = {m: xp.var(vartype=xp.integer, name='e_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(s.values()))
    prob.addVariable(list(e.values()))
    
    u = {(m, t): xp.var(vartype=xp.continuous, name='u_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(u.values()))
    
    decision_vars = [x, y, s, e]

    # Constraints
    
    # 1. Movie Duration Constraint
    for m in movies.index:
        prob.addConstraint(
            xp.Sum(x[m, t] for t in T) == movies.loc[m, 'slots_needed'] * y[m]
        )
    
    # 2. Time Slot Occupancy Constraint
    for t in T:
        prob.addConstraint(
            xp.Sum(x[m, t] for m in movies.index) == 1
        )
    
    # 3. Start Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                s[m] <= t * x[m, t] + (1 - x[m, t]) * M
            )
    
    # 4. End Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                e[m] >= (t + 1) * x[m, t]
            )
    
    # 5. Movie Duration Relationship Constraint
    for m in movies.index:
        prob.addConstraint(
            e[m] - s[m] == movies.loc[m, 'slots_needed'] * y[m]
        )

    # 6 last movie ends at midnight
    prob.addConstraint(s[m] + movies.loc[m, 'slots_needed'] -1 <= T_end for m in movies.index)

    # 7. View Count Constraints
    for m in movies.index:
        for t_idx, t in enumerate(T):
            expected_viewership = get_views(m, t_idx, movies, our_channel)
            prob.addConstraint(
                u[m, t] == expected_viewership * x[m, t]
            )
    
    # 9. Objective Function
    prob.setObjective(
        xp.Sum(u[m, t] for m in movies.index for t in T),
        sense=xp.maximize
    )
    
    return prob, decision_vars


In [205]:
# Making schedule layout
def get_time(slot_index):
    return time_slots[slot_index].time()

used_movie_ids = []
def get_sched(prob, movies, decision_vars): 
    
    x = decision_vars[0]
    y = decision_vars[1]
    s = decision_vars[2]
    e = decision_vars[3]
    
    scheduled_movies = []
    used_movie_ids = []


    for m_idx, m_row in movies.iterrows():
        y_value = prob.getSolution(y[m_idx])
        if y_value > 0.5:  # Movie is scheduled
            used_movie_ids.append(m_idx)
            start_slot = int(prob.getSolution(s[m_idx]))
            end_slot = int(prob.getSolution(e[m_idx]))
    
            # Ensure the end slot is valid
            if end_slot < len(time_slots):
                # Get the corresponding day and times
                start_time = time_slots[start_slot]
                end_time = time_slots[end_slot]
                day = start_time.strftime('%Y-%m-%d')
                start_time_formatted = start_time.strftime('%H:%M')
                end_time_formatted = end_time.strftime('%H:%M')
    
                # Append the movie schedule
                scheduled_movies.append({
                    'Time Slot': start_slot,
                    'Day': day,
                    'Start Time': start_time_formatted,
                    'End Time': end_time_formatted,
                    'Movie Index': m_idx,
                    'Movie Title': m_row['title'], 
                    'movie_budget': m_row['budget']
                })
    # movies_list = scheduled_movies.to_series
    # Convert to a DataFrame for better organization
    schedule_df = pd.DataFrame(scheduled_movies)   #.from_dict(scheduled_movies)
    
    # Sort by day and time slot
    schedule_df.sort_values(by=['Day', 'Time Slot'], inplace=True)
    
    # Reset index for cleaner output
    schedule_df.reset_index(drop=True, inplace=True)

    # Print the schedule
    print("Scheduled Movies in Order:")
    print(schedule_df)

    return scheduled_movies, used_movie_ids, schedule_df

    

In [212]:
# Putting it all together

def movie_sched(number_days, movies, our_channel):
    used_movie_ids= []
    total_schedule = []
    scheduled_dates = {}  # Dictionary to track when movies were scheduled by movie_id
    thang = []
    full_sched = pd.DataFrame(columns = ['Time Slot','Day','Start Time','End Time','Movie Index','Movie Title', 'movie_budget'])
    for k in range(number_days):

        # Before each day's scheduling, check for movies that have passed the 7-day window
        eligible_movies = []
        for m, dates in scheduled_dates.items():
            # Check if the movie was scheduled more than 14 days ago
            last_scheduled_date = pd.to_datetime(dates[-1])  # Get the last scheduled date
            if (pd.to_datetime('today') - last_scheduled_date).days > 14:
                eligible_movies.append(m)  # Movie can be scheduled again

        T = range(k*34, (k*34)+34)

        prob, decision_vars =  model(T, movies, our_channel)

        prob.solve()
        
        scheduled_movies, used_movie_ids, schedule_df = get_sched(prob,movies,decision_vars)

# Apply the 14-day constraint here
        valid_scheduled_movies = []
        valid_used_movie_ids = []
        
        for m, movie in zip(used_movie_ids, scheduled_movies):
            movie_date = movie['Start Time']
            # Check if this movie has been scheduled in the last 14 days
            if m not in scheduled_dates:
                scheduled_dates[m] = []
            
            # Filter out movies scheduled in the past 14 days
            if all((pd.to_datetime(movie_date) - pd.to_datetime(d)).days > 14 for d in scheduled_dates[m]):
                scheduled_dates[m].append(movie_date)  # Add the current movie date to the list
                valid_scheduled_movies.append(movie)
                valid_used_movie_ids.append(m)
                
            thang.append(valid_scheduled_movies)
    
        for i in used_movie_ids:
            movies.drop(i, inplace = True)
        total_schedule.append(scheduled_movies)

    pd.concat([full_sched, schedule_df])
    return full_sched
    print(full_sched)


In [213]:
two_day = movie_sched(number_days=num_days, movies=movies_small, our_channel=ch_A_schedule_30min)

FICO Xpress v9.4.2, Hyper, solve started 22:02:19, Nov 21, 2024
Heap usage: 23MB (peak 23MB, 43MB system)
Maximizing MILP Movie_Scheduling_Problem using up to 12 threads and up to 7528MB memory, with these control settings:
OUTPUTLOG = 1
NLPPOSTSOLVE = 1
XSLP_DELETIONCONTROL = 0
XSLP_OBJSENSE = -1
Original problem has:
     52849 rows        35713 cols       139331 elements     18611 entities
Presolved problem has:
     35244 rows        18611 cols       104624 elements     18611 entities
LP relaxation tightened
Presolve finished in 0 seconds
Heap usage: 36MB (peak 56MB, 43MB system)

Coefficient range                    original                 solved        
  Coefficients   [min,max] : [ 3.31e-02,  1.57e+05] / [ 1.56e-02,  1.97e+00]
  RHS and bounds [min,max] : [ 1.00e+00,  1.02e+02] / [ 1.00e+00,  1.02e+02]
  Objective      [min,max] : [ 1.00e+00,  1.00e+00] / [ 3.31e-02,  1.57e+05]
Autoscaling applied standard scaling

Symmetric problem: generators: 15, support set: 851
 Number of

In [None]:
# import numpy as np
# import pandas as pd

# def calculate_ad_slot_price(schedule_df: pd.DataFrame, base_fee: float,
#                             profit_margin: float, budget_factor: float,
#                             box_office_factor: float) -> pd.Series:
#     '''
#     Works out the cost required to buy a specific ad slot.  This is based on the time
#     of day, and the budget/earnings of the movie being shown before the
#     chosen ad slot.

#     This function is applied to a schedule dataframe to create a new column
#     containing the ad slot prices, returns NaN if the slot is not an ad slot.

#     This is also multiplied by the prime time factor, desired profit margin does
#     not take into account the effects of prime time factor currently, i.e.
#     there'll be a larger profit margin obtained than the one specified for spots
#     in prime time.

#     Values used in generation of dataset.
#     base_fee = 10_000
#     profit_margin = 0.2
#     budget_factor = 0.002
#     box_office_revenue_factor = 0.001

#     :param schedule_df: Dataframe containing the populated schedule with movies and
#                       : ad breaks.
#     :param base_fee: Base fee required for all movies to be licensed to a channel
#     :param profit_margin: Percent (in 0-1 scale) of license fee that the channel
#                         : wants to make in profit.
#     :param budget_factor: What percent (in 0-1 scale) of the movie's budget contributes
#                         : to the license fee.
#     :param box_office_factor: What percent (in 0-1 scale) of the movie's box office renvenue
#                             : contributes to the license fee.
#     '''

#     license_fee = (base_fee
#                    + (budget_factor * schedule_df.movie_budget)
#                    + (box_office_factor * schedule_df.box_office_revenue)
#                    ) * (1. + profit_margin)

#     ad_slot_cost = (license_fee / schedule_df.n_ad_breaks) * schedule_df.prime_time_factor

#     # return np.round(ad_slot_cost, 2)
#     return license_fee

In [None]:
# calculate_ad_slot_price(schedule_df= two_day, base_fee= 10_000, profit_margin= 0.2, budget_factor=0.002,box_office_factor=0.001)