In [3]:
# PATHS TO FILES

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv('channel_0_conversion_rates.csv')
ch_0_schedule = pd.read_csv('channel_0_schedule.csv')
ch_1_conversion_rates = pd.read_csv('channel_1_conversion_rates.csv')
ch_1_schedule = pd.read_csv('channel_1_schedule.csv')
ch_2_conversion_rates = pd.read_csv('channel_2_conversion_rates.csv')
ch_2_schedule = pd.read_csv('channel_2_schedule.csv')
ch_A_schedule = pd.read_csv('channel_A_schedule.csv')
movies_df = pd.read_csv('movie_database.csv')

In [4]:
xp.init('C:/xpressmp/bin/xpauth.xpr')
pd.options.mode.copy_on_write = True

In [5]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Unnamed: 0'])
    df.set_index('Date', inplace=True)
    df.drop('Unnamed: 0', axis=1, inplace = True)
   

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)


In [6]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)


In [7]:
# Define the broadcasting date

k = 2
start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
test_range = pd.date_range(start_date, periods = k)

broadcast_date = []
for i in test_range:
    broadcast_date.append(i)
for i in broadcast_date:
    broadcast_start = i.replace(hour=7, minute=0)
    broadcast_end = i.replace(hour=23, minute=30)
# print(broadcast_date)
    

# Generate all time slots
 
time_slots = []
current_time = broadcast_start
while current_time <= broadcast_end:
    time_slots.append(current_time)
    current_time += timedelta(minutes=slot_duration)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}


In [8]:
# Set 'Date' as index
#ch_A_schedule.set_index('Date', inplace=True)

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean(numeric_only = True).reset_index()



  ch_A_schedule_30min = ch_A_schedule.resample('30T').mean(numeric_only = True).reset_index()


In [14]:
# # Create a dictionary to hold baseline viewership per time slot
# baseline_viewership = {}

# for idx, row in ch_A_schedule_30min.iterrows():
#     time_slot_time = row['Date'].time()
#     baseline_viewership[time_slot_time] = {
#         'children': row['children_baseline_view_count'],
#         'adults': row['adults_baseline_view_count'],
#         'retirees': row['retirees_baseline_view_count'],
#         'prime_time_factor': row.get('prime_time_factor', 1)
#     }


In [10]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")


Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


In [11]:
# print("Baseline Viewership Data:")
# for key, value in list(baseline_viewership.items())[:5]:  # Print first 5 entries
#     print(f"Time Slot {key}: {value}")


Baseline Viewership Data:
Time Slot 07:00:00: {'children': 0.019382841356626535, 'adults': 0.02229631398193545, 'retirees': 0.0036111486676537498, 'prime_time_factor': 1.0}
Time Slot 07:30:00: {'children': 0.0154847774957037, 'adults': 0.02292807047704942, 'retirees': 0.004694213755967884, 'prime_time_factor': 1.0}
Time Slot 08:00:00: {'children': 0.00968385690271545, 'adults': 0.024244919498669682, 'retirees': 0.0060211763864975, 'prime_time_factor': 1.0}
Time Slot 08:30:00: {'children': 0.00474409740529315, 'adults': 0.026237828711766702, 'retirees': 0.007611797991820133, 'prime_time_factor': 1.0}
Time Slot 09:00:00: {'children': 0.0018312668340905334, 'adults': 0.028892152598647483, 'retirees': 0.009478422773657767, 'prime_time_factor': 1.0}


In [12]:
# Total population (adjust as needed)
total_population = 1_000_000 

In [15]:
# # PRINT EXAMPLE OF BASE VIEWERSHIP. CONTAINS VIEWERSHIP PER MOVIE PER TIME SLOT (sums all groups)
# print("Sample of base_viewership:")
# sample_items = list(base_viewership.items())[:5]
# for key, value in sample_items:
#     print(f"{key}: {value}")


In [16]:
movies_small = movies_df.sample(150)
num_movies = len(movies_small)
num_slots = 34

mov = range(num_movies)
sched = range(num_slots)

In [28]:
def model(T, movies_small):
    prob = xp.problem(name="Movie_Scheduling_Problem")
    M = range(len(movies_small))

    # Decision Variables
    # if movie m in shown in time slot t
    x = {(m,t): xp.var(vartype=xp.binary, name='x{0}_{1}'.format(m,t)) 
         for t in T for m in movies_small.index}
    prob.addVariable(x)

    # if movie m is shown 
    y = {(m): xp.var(vartype=xp.binary, name='y{0}'.format(m)) for m in movies_small}
    prob.addVariable(y)

    # start time of movie m 
    s = {(m): xp.var(vartype=xp.integer, name='s{0}'.format(m)) for m in movies_small}
    prob.addVariable(s)

    # end time of movie m 
    e = {(m): xp.var(vartype=xp.integer, name='e{0}'.format(m)) for m in movies_small}
    prob.addVariable(e)

    # movie duration
    d = {(m): xp.var(vartype=xp.integer, name='d{0}'.format(m)) for m in movies_small}
    prob.addVariable(d)

    # Constraints
    for t in T:
        # big M 
        M = len(time_slots)
        T_end = len(time_slots) -1 # last time slot index
        
        # movie duration 
        prob.addConstraint(xp.Sum(x) == d*y for m in movies_small)

        # end time
        prob.addConstraint(e == t*xp.Sum(x) for m in movies_small)

        # end time limit
        prob.addConstraint((t*x) <= e for m in movies_small)

        # start time limit
        prob.addConstraint(s <= (t*x) for m in movies_small)

        # latest time a movie can start and still show full movie 
        prob.addConstraint(s <= t + (1-x)*M for m in movies_small)

        # end-start = movie duration
        prob.addConstraint(e-s+1 == d*y for m in movies_small)

        # last movie time?
        prob.addConstraint(s+d-1 <= T_end)
        

    
    return prob

SyntaxError: expression cannot contain assignment, perhaps you meant "=="? (3910721129.py, line 34)

In [29]:
days = 2
total_schedule = []
for k in range(3): 
    T = range(k*33, (k+1)*33+1)

    prob =  model(T,movies_small)
    
    prob.solve()
    
    scheduled_movies, used_movie_ids = get_sched(prob,movies_small)

    for i in used_movie_ids:
        movies_small.drop(i, inplace = True)
    total_schedule.append(scheduled_movies)

TypeError: unsupported operand type(s) for *: 'dict' and 'dict'