In [79]:
# PATHS TO FILES

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv('channel_0_conversion_rates.csv')
ch_0_schedule = pd.read_csv('channel_0_schedule.csv')
ch_1_conversion_rates = pd.read_csv('channel_1_conversion_rates.csv')
ch_1_schedule = pd.read_csv('channel_1_schedule.csv')
ch_2_conversion_rates = pd.read_csv('channel_2_conversion_rates.csv')
ch_2_schedule = pd.read_csv('channel_2_schedule.csv')
ch_A_schedule = pd.read_csv('channel_A_schedule.csv')
movies_df = pd.read_csv('movie_database.csv')

In [80]:
xp.init('C:/xpressmp/bin/xpauth.xpr')
pd.options.mode.copy_on_write = True

In [81]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Unnamed: 0'])
    df.set_index('Date', inplace=True)
    df.drop('Unnamed: 0', axis=1, inplace = True)
   

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)


In [82]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
#movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)
slots_needed = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)
movies_df['slots_needed'] = slots_needed

In [83]:
movies_df.head()

Unnamed: 0,title,vote_average,vote_count,release_date,revenue,runtime,budget,popularity,genres,n_ad_breaks,runtime_with_ads,scaled_popularity,children_scaled_popularity,adults_scaled_popularity,retirees_scaled_popularity,slots_needed
0,Inception,8.364,34495,2010-07-15,825532764,148,160000000,83.952,"['Action', 'Science Fiction', 'Adventure']",5,180.0,0.972027,0.777622,0.972027,0.388811,6
1,Interstellar,8.417,32571,2014-11-05,701729206,169,165000000,140.241,"['Adventure', 'Drama', 'Science Fiction']",6,210.0,1.0,0.6,1.0,0.6,7
2,The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,130.643,"['Drama', 'Action', 'Crime', 'Thriller']",6,180.0,1.0,0.6,1.0,0.8,6
3,Avatar,7.573,29815,2009-12-15,2923706026,162,237000000,79.932,"['Action', 'Adventure', 'Fantasy', 'Science Fi...",6,180.0,0.958199,0.766559,0.958199,0.38328,6
4,The Avengers,7.71,29166,2012-04-25,1518815515,143,220000000,98.082,"['Science Fiction', 'Action', 'Adventure']",5,180.0,1.0,0.8,1.0,0.4,6


In [84]:
# ORIGINAL BROADCAST DATE ONE DAY

# Define the broadcasting date

broadcast_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
#start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
#test_range = pd.date_range(start_date, periods = k)

broadcast_start = broadcast_date.replace(hour=7, minute=0)
broadcast_end = broadcast_date.replace(hour=23, minute=30)
# print(broadcast_date)
    

# Generate all time slots
 
time_slots = []
current_time = broadcast_start
while current_time <= broadcast_end:
    time_slots.append(current_time)
    current_time += timedelta(minutes=slot_duration)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}

print(time_slots)

[datetime.datetime(2024, 10, 1, 7, 0), datetime.datetime(2024, 10, 1, 7, 30), datetime.datetime(2024, 10, 1, 8, 0), datetime.datetime(2024, 10, 1, 8, 30), datetime.datetime(2024, 10, 1, 9, 0), datetime.datetime(2024, 10, 1, 9, 30), datetime.datetime(2024, 10, 1, 10, 0), datetime.datetime(2024, 10, 1, 10, 30), datetime.datetime(2024, 10, 1, 11, 0), datetime.datetime(2024, 10, 1, 11, 30), datetime.datetime(2024, 10, 1, 12, 0), datetime.datetime(2024, 10, 1, 12, 30), datetime.datetime(2024, 10, 1, 13, 0), datetime.datetime(2024, 10, 1, 13, 30), datetime.datetime(2024, 10, 1, 14, 0), datetime.datetime(2024, 10, 1, 14, 30), datetime.datetime(2024, 10, 1, 15, 0), datetime.datetime(2024, 10, 1, 15, 30), datetime.datetime(2024, 10, 1, 16, 0), datetime.datetime(2024, 10, 1, 16, 30), datetime.datetime(2024, 10, 1, 17, 0), datetime.datetime(2024, 10, 1, 17, 30), datetime.datetime(2024, 10, 1, 18, 0), datetime.datetime(2024, 10, 1, 18, 30), datetime.datetime(2024, 10, 1, 19, 0), datetime.datetime(

In [85]:
from datetime import datetime, timedelta
num_days = 7
# Define the broadcasting start and end dates
broadcast_start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
broadcast_end_date = broadcast_start_date + timedelta(days=num_days)  # 2 days including start date

# Define daily broadcast start and end times
daily_broadcast_start_time = timedelta(hours=7, minutes=0)
daily_broadcast_end_time = timedelta(hours=23, minutes=30)

# Generate all time slots over the date range
time_slots = []
current_date = broadcast_start_date
while current_date <= broadcast_end_date:
    # Set the start and end times for the current day
    day_start = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_start_time
    day_end = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_end_time

    current_time = day_start
    while current_time <= day_end:
        time_slots.append(current_time)
        current_time += timedelta(minutes=slot_duration)

    # Move to the next day
    current_date += timedelta(days=1)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}

In [86]:
# Set 'Date' as index
#ch_A_schedule.set_index('Date', inplace=True)

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean(numeric_only = True).reset_index()



  ch_A_schedule_30min = ch_A_schedule.resample('30T').mean(numeric_only = True).reset_index()


In [87]:
# # Create a dictionary to hold baseline viewership per time slot
# baseline_viewership = {}

# for idx, row in ch_A_schedule_30min.iterrows():
#     time_slot_time = row['Date'].time()
#     baseline_viewership[time_slot_time] = {
#         'children': row['children_baseline_view_count'],
#         'adults': row['adults_baseline_view_count'],
#         'retirees': row['retirees_baseline_view_count'],
#         'prime_time_factor': row.get('prime_time_factor', 1)
#     }


In [88]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")


Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


In [89]:
# print("Baseline Viewership Data:")
# for key, value in list(baseline_viewership.items())[:5]:  # Print first 5 entries
#     print(f"Time Slot {key}: {value}")


In [90]:
# Total population (adjust as needed)
total_population = 1_000_000 

In [91]:
# # PRINT EXAMPLE OF BASE VIEWERSHIP. CONTAINS VIEWERSHIP PER MOVIE PER TIME SLOT (sums all groups)
# print("Sample of base_viewership:")
# sample_items = list(base_viewership.items())[:5]
# for key, value in sample_items:
#     print(f"{key}: {value}")


In [92]:
movies_small = movies_df.head(300)
movies_small.set_index('title', inplace = True)
num_movies = len(movies_small)
num_slots = 34

mov = range(num_movies)
sched = range(num_slots)

In [93]:
movies_small.head()

Unnamed: 0_level_0,vote_average,vote_count,release_date,revenue,runtime,budget,popularity,genres,n_ad_breaks,runtime_with_ads,scaled_popularity,children_scaled_popularity,adults_scaled_popularity,retirees_scaled_popularity,slots_needed
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Inception,8.364,34495,2010-07-15,825532764,148,160000000,83.952,"['Action', 'Science Fiction', 'Adventure']",5,180.0,0.972027,0.777622,0.972027,0.388811,6
Interstellar,8.417,32571,2014-11-05,701729206,169,165000000,140.241,"['Adventure', 'Drama', 'Science Fiction']",6,210.0,1.0,0.6,1.0,0.6,7
The Dark Knight,8.512,30619,2008-07-16,1004558444,152,185000000,130.643,"['Drama', 'Action', 'Crime', 'Thriller']",6,180.0,1.0,0.6,1.0,0.8,6
Avatar,7.573,29815,2009-12-15,2923706026,162,237000000,79.932,"['Action', 'Adventure', 'Fantasy', 'Science Fi...",6,180.0,0.958199,0.766559,0.958199,0.38328,6
The Avengers,7.71,29166,2012-04-25,1518815515,143,220000000,98.082,"['Science Fiction', 'Action', 'Adventure']",5,180.0,1.0,0.8,1.0,0.4,6


In [94]:
def model(T, movies):
    prob = xp.problem(name="Movie_Scheduling_Problem")
    # M = range(len(movies_small))
    # Decision Variables
    # if movie m in shown in time slot t
    x = {(m,t): xp.var(vartype=xp.binary, name='x{0}_{1}'.format(m,t)) 
         for t in T for m in movies.index}
    prob.addVariable(x)
    
    y = {(m): xp.var(vartype=xp.binary, name='y{0}'.format(m)) for m in movies.index}
    prob.addVariable(y)

    # start time of movie m 
    s = {(m): xp.var(vartype=xp.integer, name='s{0}'.format(m)) for m in movies.index}
    prob.addVariable(s)

    # end time of movie m 
    e = {(m): xp.var(vartype=xp.integer, name='e{0}'.format(m)) for m in movies.index}
    prob.addVariable(e)
    decision_vars = [x,y,s,e]
    # movie duration
    

    # Constraints
    # for t in T:
        # big M 
    M = len(time_slots)
    T_end = len(time_slots) -1 # last time slot index
    
    # movie duration 
    prob.addConstraint(xp.Sum(x[m,t] for t in T) == movies.loc[m, 'slots_needed']*y[m] for m in movies.index)

    prob.addConstraint(xp.Sum(x[m,t] for m in movies.index) == 1 for t in T)

    # end time
    # prob.addConstraint(e[m] == t*xp.Sum(x) for m in movie.index())

    # end time limit
    prob.addConstraint((t+1)*x[m,t] <= e[m] for m in movies.index for t in T)

    # start time limit
    #prob.addConstraint(s[m] <= (t*x[m,t]) for m in movie.index() fot t in T)

    # latest time a movie can start and still show full movie 
    prob.addConstraint(s[m] <= t*x[m,t] + (1-x[m,t])*M for m in movies.index for t in T)

    # end-start = movie duration
    prob.addConstraint(e[m]-s[m] == movies.loc[m, 'slots_needed']*y[m] for m in movies.index)

    # last movie time?
    prob.addConstraint(s[m] + movies.loc[m, 'slots_needed'] -1 <= T_end for m in movies.index)

    return prob, decision_vars

In [95]:
def get_time(slot_index):
    return time_slots[slot_index].time()

used_movie_ids = []
def get_sched(prob, movies, decision_vars): 
    
    # antonio's schedule code 
    x = decision_vars[0]
    y = decision_vars[1]
    s = decision_vars[2]
    e = decision_vars[3]
    
    scheduled_movies = []
    used_movie_ids = []

    for m in movies_small.index:
    # Retrieve the solution value for y[m_idx]
        y_value = prob.getSolution(y[m])
        
        if y_value > 0.5:  # Movie is scheduled
            used_movie_ids.append(m)
            
            # # Retrieve solution values for s[m_idx] and e[m_idx]
            start_slot = int(prob.getSolution(s[m]))
            end_slot = int(prob.getSolution(e[m]))
            
            # # # # Convert slot indices to actual times
            start_time = get_time(start_slot)
            end_time = get_time(end_slot)
    
            # # Append the scheduled movie details
            scheduled_movies.append({
                'Movie Title': m,
                'Start Slot': start_slot,
                'Start Time': start_time.strftime('%H:%M'),
                'End Slot': end_slot,
                'End Time': end_time.strftime('%H:%M')
            })
    schedule_df = pd.DataFrame.from_dict(scheduled_movies)
    schedule_df.sort_values(['Start Slot'], axis = 0, inplace = True)
    print(schedule_df)
    return scheduled_movies, used_movie_ids
    

In [96]:
days = 1
total_schedule = []
for k in range(num_days): 
    T = range(k*33, (k+1)*33)

    prob, decision_vars =  model(T,movies_small)
    
    prob.solve()
    
    scheduled_movies, used_movie_ids = get_sched(prob,movies_small,decision_vars)
    
    for i in used_movie_ids:
        movies_small.drop(i, inplace = True)
    total_schedule.append(scheduled_movies)

#print(scheduled_movies)
print(schedule_df)

FICO Xpress v9.4.2, Hyper, solve started 12:49:42, Nov 18, 2024
Heap usage: 10MB (peak 10MB, 7582KB system)
Minimizing MILP Movie_Scheduling_Problem using up to 12 threads and up to 7528MB memory, with these control settings:
OUTPUTLOG = 1
NLPPOSTSOLVE = 1
XSLP_DELETIONCONTROL = 0
XSLP_OBJSENSE = 1
Original problem has:
     20733 rows        10800 cols        60900 elements     10800 entities
Presolved problem has:
     20433 rows        10800 cols        60600 elements     10800 entities
LP relaxation tightened
Presolve finished in 0 seconds
Heap usage: 16MB (peak 22MB, 7582KB system)

Coefficient range                    original                 solved        
  Coefficients   [min,max] : [ 1.00e+00,  2.72e+02] / [ 3.91e-03,  1.99e+00]
  RHS and bounds [min,max] : [ 1.00e+00,  2.72e+02] / [ 1.00e+00,  2.72e+02]
  Objective      [min,max] : [      0.0,       0.0] / [      0.0,       0.0]
Autoscaling applied standard scaling

Symmetric problem: generators: 294, support set: 10800
 Num

NameError: name 'schedule_df' is not defined