In [51]:
# PATHS TO FILES
# Attempt 1 at basic code assigning a movie/ads spaces to a schedule

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os

# Define the base path explicitly to where your files are stored
# This is the correct path on your machine
base_path = 'D:/Documents/EDINBURGH/MMCS/final_project/repository/mmcsfinalproject/data'

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_0_conversion_rates.csv'))
ch_0_schedule = pd.read_csv(os.path.join(base_path, 'channel_0_schedule.csv'))
ch_1_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_1_conversion_rates.csv'))
ch_1_schedule = pd.read_csv(os.path.join(base_path, 'channel_1_schedule.csv'))
ch_2_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_2_conversion_rates.csv'))
ch_2_schedule = pd.read_csv(os.path.join(base_path, 'channel_2_schedule.csv'))
ch_A_schedule = pd.read_csv(os.path.join(base_path, 'channel_A_schedule.csv'))
movies_df = pd.read_csv(os.path.join(base_path, 'movie_database.csv'))

In [52]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Date'])

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)


In [53]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)


In [54]:
# Define the broadcasting date
broadcast_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
broadcast_start = broadcast_date.replace(hour=7, minute=0)
broadcast_end = broadcast_date.replace(hour=23, minute=30)

# Generate all time slots
time_slots = []
current_time = broadcast_start
while current_time <= broadcast_end:
    time_slots.append(current_time)
    current_time += timedelta(minutes=slot_duration)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}


In [55]:
# Set 'Date' as index
ch_A_schedule.set_index('Date', inplace=True)

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()



  ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()


In [56]:
# Create a dictionary to hold baseline viewership per time slot
baseline_viewership = {}

for idx, row in ch_A_schedule_30min.iterrows():
    time_slot_time = row['Date'].time()
    baseline_viewership[time_slot_time] = {
        'children': row['children_baseline_view_count'],
        'adults': row['adults_baseline_view_count'],
        'retirees': row['retirees_baseline_view_count'],
        'prime_time_factor': row.get('prime_time_factor', 1)
    }


In [57]:
# Initialize the problem
prob = xp.problem(name="Movie_Scheduling_Problem")


In [58]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")


Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


In [59]:
print(movies_df)

                      title  vote_average  vote_count release_date  \
0                 Inception         8.364       34495   2010-07-15   
1              Interstellar         8.417       32571   2014-11-05   
2           The Dark Knight         8.512       30619   2008-07-16   
3                    Avatar         7.573       29815   2009-12-15   
4              The Avengers         7.710       29166   2012-04-25   
...                     ...           ...         ...          ...   
5915           The In Crowd         4.480          51   2000-07-19   
5916     Mr. Saturday Night         5.769          51   1992-09-23   
5917  Young Doctors in Love         5.204          51   1982-07-16   
5918       End of the Spear         6.010          51   2005-10-24   
5919      His Kind of Woman         6.657          51   1951-08-15   

         revenue  runtime     budget  popularity  \
0      825532764      148  160000000      83.952   
1      701729206      169  165000000     140.241   
2  

In [60]:
# Create variables x_{mt}
x = {}
for m_idx, m_row in movies_df.iterrows():
    m = m_row['title']
    m_id = f"{m.replace(' ', '_')}_{m_idx}"
    for t in range(len(time_slots)):
        var_name = f"x_{m_id}_{t}"
        x[(m_idx, t)] = xp.var(vartype=xp.binary, name=var_name)
        prob.addVariable(x[(m_idx, t)])

# Create variables y_m
y = {}
for m_idx, m_row in movies_df.iterrows():
    m = m_row['title']
    m_id = f"{m.replace(' ', '_')}_{m_idx}"
    var_name = f"y_{m_id}"
    y[m_idx] = xp.var(vartype=xp.binary, name=var_name)
    prob.addVariable(y[m_idx])

# Create variables s_m
s = {}
for m_idx, m_row in movies_df.iterrows():
    m = m_row['title']
    m_id = f"{m.replace(' ', '_')}_{m_idx}"
    var_name = f"s_{m_id}"
    s[m_idx] = xp.var(vartype=xp.integer, name=var_name, lb=0, ub=len(time_slots)-1)
    prob.addVariable(s[m_idx])

# Create variables e_m
e = {}
for m_idx, m_row in movies_df.iterrows():
    m = m_row['title']
    m_id = f"{m.replace(' ', '_')}_{m_idx}"
    var_name = f"e_{m_id}"
    e[m_idx] = xp.var(vartype=xp.integer, name=var_name, lb=0, ub=len(time_slots)-1)
    prob.addVariable(e[m_idx])


In [61]:
# Map baseline viewership to time slot indices
baseline_viewership = {}
for idx, row in ch_A_schedule_30min.iterrows():
    time_slot_datetime = row['Date']
    # Find the corresponding time slot index
    time_slot_index = time_to_index.get(time_slot_datetime, None)
    if time_slot_index is not None:
        baseline_viewership[time_slot_index] = {
            'children': row['children_baseline_view_count'],
            'adults': row['adults_baseline_view_count'],
            'retirees': row['retirees_baseline_view_count'],
            'prime_time_factor': row.get('prime_time_factor', 1)
        }


In [62]:
print("Baseline Viewership Data:")
for key, value in list(baseline_viewership.items())[:5]:  # Print first 5 entries
    print(f"Time Slot {key}: {value}")


Baseline Viewership Data:
Time Slot 0: {'children': 0.019382841356626535, 'adults': 0.022296313934718853, 'retirees': 0.0036111486676537498, 'prime_time_factor': 1.0}
Time Slot 1: {'children': 0.0154847774957037, 'adults': 0.022928070451193747, 'retirees': 0.004694213755967884, 'prime_time_factor': 1.0}
Time Slot 2: {'children': 0.00968385690271545, 'adults': 0.024244919484627685, 'retirees': 0.0060211763864975, 'prime_time_factor': 1.0}
Time Slot 3: {'children': 0.00474409740529315, 'adults': 0.026237828704203284, 'retirees': 0.007611797991820133, 'prime_time_factor': 1.0}
Time Slot 4: {'children': 0.0018312668340905334, 'adults': 0.02889215259460713, 'retirees': 0.009478422773657767, 'prime_time_factor': 1.0}


In [63]:
# Total population (adjust as needed)
total_population = 1_000_000 

# Compute base viewership
base_viewership = {}
for m_idx, m_row in movies_df.iterrows():
    m = m_row['title']
    m_id = f"{m.replace(' ', '_')}_{m_idx}"
    # Scaled popularity for the movie
    scaled_popularity = {
        'children': m_row['children_scaled_popularity'],
        'adults': m_row['adults_scaled_popularity'],
        'retirees': m_row['retirees_scaled_popularity']
    }
    for t in range(len(time_slots)):
        # Baseline viewership for the time slot
        baseline = baseline_viewership.get(t, {
            'children': 0,
            'adults': 0,
            'retirees': 0,
            'prime_time_factor': 1
        })
        # Calculate base viewership for this movie and time slot
        viewership = 0
        for group in ['children', 'adults', 'retirees']:
            group_viewership = (
                baseline[group] *
                scaled_popularity[group] *
                baseline['prime_time_factor'] *
                total_population
            )
            viewership += group_viewership
        # Store the calculated viewership
        base_viewership[(m_idx, t)] = viewership



In [64]:
# PRINT EXAMPLE OF BASE VIEWERSHIP. CONTAINS VIEWERSHIP PER MOVIE PER TIME SLOT (sums all groups)
print("Sample of base_viewership:")
sample_items = list(base_viewership.items())[:5]
for key, value in sample_items:
    print(f"{key}: {value}")


Sample of base_viewership:
(0, 0): 38149.2062495903
(0, 1): 36153.17829812642
(0, 2): 33438.205834265864
(0, 3): 32152.553718488532
(0, 4): 33193.31238204702


In [67]:
# Create the objective function expression
objective_terms = []
for m_idx, m_row in movies_df.iterrows():
    for t in range(len(time_slots)):
        var = x[(m_idx, t)]
        coeff = base_viewership.get((m_idx, t), 0)
        if coeff != 0:
            objective_terms.append(coeff * var)

# Set the objective function
prob.setObjective(xp.Sum(objective_terms), sense=xp.maximize)


In [68]:
# Constraint 1: Time Slot Occupancy Constraint
for t in range(len(time_slots)):
    prob.addConstraint(
        xp.Sum(x[(m_idx, t)] for m_idx in movies_df.index) == 1
    )


In [69]:
# Constraint 2: Movie Duration Constraint
for m_idx, m_row in movies_df.iterrows():
    d_m = m_row['slots_needed']
    duration = xp.Sum(x[(m_idx, t)] for t in range(len(time_slots)))
    prob.addConstraint(duration == y[m_idx] * d_m)


In [70]:
# Constraint 3: End Time Definition Constraint
for m_idx in movies_df.index:
    for t in range(len(time_slots)):
        end_time_constraint = e[m_idx] >= t * x[(m_idx, t)]
        prob.addConstraint(end_time_constraint)


In [71]:
# Constraint 4: Movie Duration Relationship Constraint
for m_idx, m_row in movies_df.iterrows():
    d_m = m_row['slots_needed']
    duration_relationship = e[m_idx] - s[m_idx] + 1 == y[m_idx] * d_m
    prob.addConstraint(duration_relationship)


In [72]:
# Define Big-M
M = len(time_slots)

# Constraint 5: Start Time Constraint
for m_idx in movies_df.index:
    for t in range(len(time_slots)):
        start_time_constraint = s[m_idx] <= t + M * (1 - x[(m_idx, t)])
        prob.addConstraint(start_time_constraint)


In [73]:
# Constraint 6: Contiguity Constraint
for m_idx, m_row in movies_df.iterrows():
    d_m = m_row['slots_needed']
    for t in range(len(time_slots)):
        # Enforce x_{mt} == 1 if t is within [s_m, s_m + d_m -1]
        # Using Big-M constraints
        
        # x_{mt} >= y_m - (s_m > t + d_m -1)
        # Since we cannot use logical operators, use linear constraints
        
        # If t >= s_m and t < s_m + d_m, then x_{mt} >= y_m
        # Otherwise, x_{mt} >= 0 (which is already ensured since x is binary)
        
        # Implement the following:
        # x_{mt} >= y_m + (t - s_m) / M - (t - s_m - d_m + 1) / M - 1
        # This is a linear approximation and may not strictly enforce contiguity
        # Hence, it's better to use integer variables and enforce exact relations
        
        # Instead, we'll use the relationship defined in Constraint 4 to indirectly enforce contiguity
        pass  # Already handled by Constraints 2 and 4


In [74]:
# Constraint 7: Broadcasting Time Limit Constraint
T_end = len(time_slots) - 1  # Last time slot index

for m_idx, m_row in movies_df.iterrows():
    d_m = m_row['slots_needed']
    broadcasting_limit = s[m_idx] + d_m - 1 <= T_end
    prob.addConstraint(broadcasting_limit)


In [75]:
# Set solver options if needed (optional)
# For example, to set the time limit or verbosity
# xp.control.outputlog = 1  # Enable solver output
# xp.control.maxtime = 600  # Set maximum solving time to 600 seconds

# Solve the problem
prob.solve()

# Retrieve the solver status
status = prob.getProbStatus()
print(f"Solver Status: {status}")

FICO Xpress v9.4.2, Hyper, solve started 22:28:04, Nov 4, 2024
Heap usage: 194MB (peak 194MB, 39MB system)
Maximizing MILP Movie_Scheduling_Problem using up to 8 threads and up to 11GB memory, with these control settings:
OUTPUTLOG = 1
NLPPOSTSOLVE = 1
XSLP_DELETIONCONTROL = 0
XSLP_OBJSENSE = -1
Original problem has:
    420354 rows       219040 cols      1231360 elements    219040 entities
Presolved problem has:
    383073 rows       219037 cols      1168636 elements    219037 entities
LP relaxation tightened
Presolve finished in 126 seconds
Heap usage: 308MB (peak 432MB, 39MB system)

Coefficient range                    original                 solved        
  Coefficients   [min,max] : [ 1.00e+00,  3.40e+01] / [ 3.13e-02,  1.94e+00]
  RHS and bounds [min,max] : [ 1.00e+00,  6.70e+01] / [ 1.25e-01,  3.30e+01]
  Objective      [min,max] : [ 1.72e-02,  2.70e+05] / [ 1.72e-02,  2.70e+05]
Autoscaling applied standard scaling

Symmetric problem: generators: 113, support set: 6253
 Numbe

In [80]:
# Function to convert time slot index to actual time
def get_time(slot_index):
    return time_slots[slot_index].time()

# Initialize a list to store scheduled movies
scheduled_movies = []

for m_idx, m_row in movies_df.iterrows():
    # Retrieve the solution value for y[m_idx]
    y_value = prob.getSolution(y[m_idx])
    
    if y_value > 0.5:  # Movie is scheduled
        # Retrieve solution values for s[m_idx] and e[m_idx]
        start_slot = int(prob.getSolution(s[m_idx]))
        end_slot = int(prob.getSolution(e[m_idx]))
        
        # Convert slot indices to actual times
        start_time = get_time(start_slot)
        end_time = get_time(end_slot)
        
        # Append the scheduled movie details
        scheduled_movies.append({
            'Movie Index': m_idx,
            'Movie Title': m_row['title'],
            'Start Slot': start_slot,
            'Start Time': start_time.strftime('%H:%M'),
            'End Slot': end_slot,
            'End Time': end_time.strftime('%H:%M')
        })

# Convert to DataFrame for better visualization
schedule_df = pd.DataFrame(scheduled_movies)

print("Scheduled Movies:")
print(schedule_df)


Scheduled Movies:
   Movie Index              Movie Title  Start Slot Start Time  End Slot  \
0           10             Forrest Gump          28      21:00        33   
1           22  The Wolf of Wall Street           9      11:30        15   
2          403                    After           5      09:30         8   
3         2522         No Hard Feelings          24      19:00        27   
4         4449                   Strays          16      15:00        19   
5         5099          Haunted Mansion           0      07:00         4   
6         5750                  Bottoms          20      17:00        23   

  End Time  
0    23:30  
1    14:30  
2    11:00  
3    20:30  
4    16:30  
5    09:00  
6    18:30  


In [81]:
# Initialize a list to store time slot assignments
time_slot_assignments = []

for t in range(len(time_slots)):
    for m_idx in movies_df.index:
        if prob.getSolution(x[(m_idx, t)]) > 0.5:
            movie_title = movies_df.loc[m_idx, 'title']
            start_slot = int(prob.getSolution(s[m_idx]))
            start_time = get_time(start_slot)
            end_slot = int(prob.getSolution(e[m_idx]))
            end_time = get_time(end_slot)
            time_slot_assignments.append({
                'Time Slot Index': t,
                'Time': time_slots[t].strftime('%H:%M'),
                'Movie Title': movie_title,
                'Start Time': start_time.strftime('%H:%M'),
                'End Time': end_time.strftime('%H:%M')
            })
            break  # Since only one movie per time slot

# Convert to DataFrame
schedule_mapping_df = pd.DataFrame(time_slot_assignments)

print("\nTime Slot Assignments:")
print(schedule_mapping_df)


KeyboardInterrupt: 