In [23]:
# PATHS TO FILES
# Attempt 1 at basic code assigning a movie/ads spaces to a schedule

import numpy as np
import pandas as pd
import xpress as xp
from datetime import datetime, timedelta
import os

# Define the base path explicitly to where your files are stored
# This is the correct path on your machine
base_path = 'D:/OneDrive/Documents/EDINBURGH/MMCS/final_project/repository/mmcsfinalproject/data'

# Read in files using the explicitly defined base path
ch_0_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_0_conversion_rates.csv'))
ch_0_schedule = pd.read_csv(os.path.join(base_path, 'channel_0_schedule.csv'))
ch_1_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_1_conversion_rates.csv'))
ch_1_schedule = pd.read_csv(os.path.join(base_path, 'channel_1_schedule.csv'))
ch_2_conversion_rates = pd.read_csv(os.path.join(base_path, 'channel_2_conversion_rates.csv'))
ch_2_schedule = pd.read_csv(os.path.join(base_path, 'channel_2_schedule.csv'))
ch_A_schedule = pd.read_csv(os.path.join(base_path, 'channel_A_schedule.csv'))
movies_df = pd.read_csv(os.path.join(base_path, 'movie_database.csv'))

In [24]:
# FORMATING
# Convert 'Date-Time' columns to datetime format
date_cols = ['Date']

for df in [ch_0_conversion_rates, ch_0_schedule, ch_1_conversion_rates, ch_1_schedule,
           ch_2_conversion_rates, ch_2_schedule, ch_A_schedule]:
    df['Date'] = pd.to_datetime(df['Date'])

# Convert 'Release Date' in movie_database to datetime
movies_df['release_date'] = pd.to_datetime(movies_df['release_date'])

# Fill missing values if necessary
movies_df.fillna(0, inplace=True)


In [25]:
# Slot duration 30 minutes
slot_duration = 30  # minutes
movies_df['slots_needed'] = (movies_df['runtime_with_ads'] / slot_duration).apply(lambda x: int(x)).astype(int)


In [26]:
from datetime import datetime, timedelta

num_days = 6

# Define the broadcasting start and end dates
broadcast_start_date = datetime.strptime("2024-10-01", "%Y-%m-%d")
broadcast_end_date = broadcast_start_date + timedelta(days=num_days)  # 7 days including start date

# Define daily broadcast start and end times
daily_broadcast_start_time = timedelta(hours=7, minutes=0)
daily_broadcast_end_time = timedelta(hours=23, minutes=30)

# Generate all time slots over the date range
time_slots = []
current_date = broadcast_start_date
while current_date <= broadcast_end_date:
    # Set the start and end times for the current day
    day_start = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_start_time
    day_end = datetime.combine(current_date.date(), datetime.min.time()) + daily_broadcast_end_time
    
    current_time = day_start
    while current_time <= day_end:
        time_slots.append(current_time)
        current_time += timedelta(minutes=slot_duration)
    
    # Move to the next day
    current_date += timedelta(days=1)

# Create mappings between time slots and indices
time_to_index = {t: idx for idx, t in enumerate(time_slots)}
index_to_time = {idx: t for idx, t in enumerate(time_slots)}


In [27]:
# Print the first few time slots
print("First 10 Time Slots:")
for ts in time_slots[:10]:
    print(ts.strftime('%Y-%m-%d %H:%M'))

# Print the last few time slots
print("\nLast 10 Time Slots:")
for ts in time_slots[-10:]:
    print(ts.strftime('%Y-%m-%d %H:%M'))


First 10 Time Slots:
2024-10-01 07:00
2024-10-01 07:30
2024-10-01 08:00
2024-10-01 08:30
2024-10-01 09:00
2024-10-01 09:30
2024-10-01 10:00
2024-10-01 10:30
2024-10-01 11:00
2024-10-01 11:30

Last 10 Time Slots:
2024-10-07 19:00
2024-10-07 19:30
2024-10-07 20:00
2024-10-07 20:30
2024-10-07 21:00
2024-10-07 21:30
2024-10-07 22:00
2024-10-07 22:30
2024-10-07 23:00
2024-10-07 23:30


In [28]:
from datetime import time
# Set 'Date' as index
ch_A_schedule.set_index('Date', inplace=True)

# Resample to 30-minute intervals (use mean of groups)
ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()

# Define start and end times for filtering
daily_start_time = time(7, 0)  # 7:00 AM
daily_end_time = time(23, 55)  # 11:55 PM

# Filter rows to include only time slots within the desired range 
ch_A_schedule_30min['time'] = ch_A_schedule_30min['Date'].dt.time
ch_A_schedule_30min = ch_A_schedule_30min[
    (ch_A_schedule_30min['time'] >= daily_start_time) &
    (ch_A_schedule_30min['time'] <= daily_end_time)
].drop(columns=['time'])

# Print the first 5 rows
print("First 5 rows of ch_A_schedule_30min:")
print(ch_A_schedule_30min.head(3))



First 5 rows of ch_A_schedule_30min:
                 Date  children_baseline_view_count  \
0 2024-10-01 07:00:00                      0.019383   
1 2024-10-01 07:30:00                      0.015485   
2 2024-10-01 08:00:00                      0.009684   

   adults_baseline_view_count  retirees_baseline_view_count  prime_time_factor  
0                    0.022296                      0.003611                1.0  
1                    0.022928                      0.004694                1.0  
2                    0.024245                      0.006021                1.0  


  ch_A_schedule_30min = ch_A_schedule.resample('30T').mean().reset_index()


In [29]:
# # Map baseline viewership to time slot indices
# baseline_viewership = {}
# for idx, row in ch_A_schedule_30min.iterrows():
#     time_slot_datetime = row['Date']
#     # Find the corresponding time slot index
#     time_slot_index = time_to_index.get(time_slot_datetime, None)
#     if time_slot_index is not None:
#         baseline_viewership[time_slot_index] = {
#             'children': row['children_baseline_view_count'],
#             'adults': row['adults_baseline_view_count'],
#             'retirees': row['retirees_baseline_view_count'],
#             'prime_time_factor': row.get('prime_time_factor', 1)
#         }

# # Print the first 5 rows
# print("Print timeslot_index 1 of dictionary baseline_viewership:")
# print(baseline_viewership[1])


In [30]:
# Check for duplicate movie titles
duplicate_titles = movies_df[movies_df.duplicated(subset=['title'], keep=False)]
if not duplicate_titles.empty:
    print("Duplicate movie titles found:")
    print(duplicate_titles['title'])
else:
    print("No duplicate movie titles found.")


Duplicate movie titles found:
4                 The Avengers
17                     Titanic
76               The Lion King
105       Beauty and the Beast
149        Alice in Wonderland
                 ...          
5748              Midnight Sun
5761                The Island
5773            The Shaggy Dog
5855    Fun with Dick and Jane
5879        The Perfect Weapon
Name: title, Length: 258, dtype: object


In [31]:
print(movies_df.head(1))

       title  vote_average  vote_count release_date    revenue  runtime  \
0  Inception         8.364       34495   2010-07-15  825532764      148   

      budget  popularity                                      genres  \
0  160000000      83.952  ['Action', 'Science Fiction', 'Adventure']   

   n_ad_breaks  runtime_with_ads  scaled_popularity  \
0            5             180.0           0.972027   

   children_scaled_popularity  adults_scaled_popularity  \
0                    0.777622                  0.972027   

   retirees_scaled_popularity  slots_needed  
0                    0.388811             6  


In [32]:
def get_views(m, t_idx, movies, our_channel):
    # Baseline viewership at time slot t_idx
    baseline = {
        'children': our_channel.iloc[t_idx]['children_baseline_view_count'],
        'adults': our_channel.iloc[t_idx]['adults_baseline_view_count'],
        'retirees': our_channel.iloc[t_idx]['retirees_baseline_view_count']
    }
    
    # Scaled popularity of movie m
    popularity = {
        'children': movies.loc[m, 'children_scaled_popularity'],
        'adults': movies.loc[m, 'adults_scaled_popularity'],
        'retirees': movies.loc[m, 'retirees_scaled_popularity']
    }
    
    # Calculate expected viewership (assuming total population of 1,000,000)
    total_population = 1_000_000
    viewership = sum(
        baseline[demo] * popularity[demo] * total_population
        for demo in ['children', 'adults', 'retirees']
    )
    
    return viewership

In [59]:
def model(T, movies, our_channel, time_slots):
    prob = xp.problem(name="Movie_Scheduling_Problem")
    

    # Constants
    slots_per_day = 34  # Number of slots per day (e.g., from 07:00 to 23:30)
    days = len(time_slots) // slots_per_day  # Number of days in the schedule
    M = len(time_slots)  # Big M for constraints
    T_end = len(time_slots) - 1  # Last time slot index

    
    # Decision Variables
    x = {(m, t): xp.var(vartype=xp.binary, name='x_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(x.values()))
    
    y = {m: xp.var(vartype=xp.binary, name='y_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(y.values()))
    
    s = {m: xp.var(vartype=xp.integer, name='s_{0}'.format(m)) for m in movies.index}
    e = {m: xp.var(vartype=xp.integer, name='e_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(s.values()))
    prob.addVariable(list(e.values()))
    
    u = {(m, t): xp.var(vartype=xp.continuous, name='u_{0}_{1}'.format(m, t)) 
         for m in movies.index for t in T}
    prob.addVariable(list(u.values()))

    # Integer variable D[m]: day on which movie m starts
    D = {m: xp.var(vartype=xp.integer, lb=0, ub=days - 1, name='D_{0}'.format(m)) for m in movies.index}
    prob.addVariable(list(D.values()))

    
    decision_vars = [x, y, s, e, u, D]
    


    # Constraints
    
    # 1. Movie Duration Constraint
    for m in movies.index:
        prob.addConstraint(
            xp.Sum(x[m, t] for t in T) == movies.loc[m, 'slots_needed'] * y[m]
        )
    
    # 2. Time Slot Occupancy Constraint
    for t in T:
        prob.addConstraint(
            xp.Sum(x[m, t] for m in movies.index) == 1
        )
    
    # 3. Start Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                s[m] <= t * x[m, t] + (1 - x[m, t]) * M
            )
    
    # 4. End Time Constraints
    for m in movies.index:
        for t in T:
            prob.addConstraint(
                e[m] >= (t + 1) * x[m, t]
            )
    
    # 5. Movie Duration Relationship Constraint
    for m in movies.index:
        prob.addConstraint(
            e[m] - s[m] == movies.loc[m, 'slots_needed'] * y[m]
        )
    
    # # 6. Latest Start Time Constraint
    # for m in movies.index:
    #     prob.addConstraint(
    #         s[m] + movies.loc[m, 'slots_needed'] - 1 <= T_end
    #     )
    
    # 6. Start and End Times Within the Same Day
    for m in movies.index:
        # Movie starts no earlier than the day's start time
        prob.addConstraint(
            s[m] >= D[m] * slots_per_day
        )
        # Movie ends no later than the day's end time
        prob.addConstraint(
            s[m] + movies.loc[m, 'slots_needed'] - 1 <= (D[m] + 1) * slots_per_day - 1
        )


    # 7. View Count Constraints
    for m in movies.index:
        for t_idx, t in enumerate(T):
            expected_viewership = get_views(m, t_idx, movies, our_channel)
            prob.addConstraint(
                u[m, t] == expected_viewership * x[m, t]
            )
    
    # # 8. Prevent Movies from Spanning Multiple Days
    # slots_per_day = 34  # Adjust as needed
    # days = len(time_slots) // slots_per_day

    # for m in movies.index:
    #     for day in range(days):
    #         day_start = day * slots_per_day
    #         day_end = (day + 1) * slots_per_day - 1  # Last slot of the day
    #         # Constraint to ensure the movie ends within the day's last slot
    #         prob.addConstraint( 
    #             e[m] <= day_end + (1 - y[m]) * M
    #         )


    # 9. Objective Function
    prob.setObjective(
        xp.Sum(u[m, t] for m in movies.index for t in T),
        sense=xp.maximize
    )
    
    return prob, decision_vars


In [60]:
# Subset the first 100 movies and reset the index
movies_subset = movies_df.head(100).reset_index(drop=True)

# Subset the first 68 time slots
number_of_time_slots = 68
time_slots_subset = time_slots[:number_of_time_slots]

# Define T as the range of indices for the subset of time slots
T_subset = list(range(len(time_slots_subset)))

# Subset ch_A_schedule_30min to match the 68 time slots
ch_A_schedule_30min_subset = ch_A_schedule_30min.head(number_of_time_slots)

# Call the model function with the subsets
prob, decision_vars = model(T_subset, movies_subset, ch_A_schedule_30min_subset, time_slots_subset)

# Set the maximum solving time and enable output log
# seconds = 10
# prob.setControl('maxtime', seconds)
# prob.setControl('outputlog', 1)  # Enable detailed logs


# Solve the problem
prob.solve()

# Retrieve the solver status
status = prob.getProbStatus()
print(f"Solver Status: {status}")

if status == 4:  # Stopped due to time limit
    print("Solver stopped due to time limit.")
elif status == 1:  # Optimal solution found
    print("Optimal solution found.")
else:
    print(f"Solver stopped with status code {status}.")




FICO Xpress v9.4.2, Hyper, solve started 1:52:37, Nov 21, 2024
Heap usage: 10133KB (peak 10133KB, 7226KB system)
Maximizing MILP Movie_Scheduling_Problem using up to 8 threads and up to 11GB memory, with these control settings:
OUTPUTLOG = 1
NLPPOSTSOLVE = 1
XSLP_DELETIONCONTROL = 0
XSLP_OBJSENSE = -1
Original problem has:
     20868 rows        14000 cols        55200 elements      7200 entities
Presolved problem has:
     13436 rows         7000 cols        39940 elements      7000 entities
LP relaxation tightened
Presolve finished in 0 seconds
Heap usage: 14MB (peak 21MB, 7226KB system)

Coefficient range                    original                 solved        
  Coefficients   [min,max] : [ 1.00e+00,  1.71e+05] / [ 1.56e-02,  1.97e+00]
  RHS and bounds [min,max] : [ 1.00e+00,  6.80e+01] / [ 8.13e-01,  6.80e+01]
  Objective      [min,max] : [ 1.00e+00,  1.00e+00] / [ 1.46e+04,  2.93e+05]
Autoscaling applied standard scaling

Symmetric problem: generators: 10, support set: 980
 Num

In [61]:
# Retrieve the solution for scheduled movies
x, y, s, e, u, D = decision_vars  # Unpack decision variables

# Initialize a list to store scheduled movies
scheduled_movies = []

for m_idx, m_row in movies_subset.iterrows():
    y_value = prob.getSolution(y[m_idx])
    if y_value > 0.5:  # Movie is scheduled
        start_slot = int(prob.getSolution(s[m_idx]))
        end_slot = int(prob.getSolution(e[m_idx]))

        # Ensure the end slot is valid
        if end_slot < len(time_slots_subset):
            # Get the corresponding day and times
            start_time = time_slots_subset[start_slot]
            end_time = time_slots_subset[end_slot]
            day = start_time.strftime('%Y-%m-%d')
            start_time_formatted = start_time.strftime('%H:%M')
            end_time_formatted = end_time.strftime('%H:%M')

            # Append the movie schedule
            scheduled_movies.append({
                'Time Slot': start_slot,
                'Day': day,
                'Start Time': start_time_formatted,
                'End Time': end_time_formatted,
                'Movie Index': m_idx,
                'Movie Title': m_row['title']
            })

# Convert to a DataFrame for better organization
schedule_df = pd.DataFrame(scheduled_movies)

# Sort by day and time slot
schedule_df.sort_values(by=['Day', 'Time Slot'], inplace=True)

# Reset index for cleaner output
schedule_df.reset_index(drop=True, inplace=True)

# Print the schedule
print("Scheduled Movies in Order:")
print(schedule_df)



Scheduled Movies in Order:
   Time Slot         Day Start Time End Time  Movie Index  \
0          0  2024-10-01      07:00    10:00           59   
1          6  2024-10-01      10:00    14:00           17   
2         14  2024-10-01      14:00    17:00           10   
3         20  2024-10-01      17:00    20:30           22   
4         27  2024-10-01      20:30    07:00            1   
5         34  2024-10-02      07:00    10:00           38   
6         40  2024-10-02      10:00    12:30            7   
7         45  2024-10-02      12:30    16:00           53   
8         52  2024-10-02      16:00    18:00           93   
9         56  2024-10-02      18:00    21:00           14   

                                Movie Title  
0    Harry Potter and the Half-Blood Prince  
1                                   Titanic  
2                              Forrest Gump  
3                   The Wolf of Wall Street  
4                              Interstellar  
5  Harry Potter and the P

In [62]:
# Initialize a list to store scheduled movies
scheduled_movies = []

for m_idx, m_row in movies_subset.iterrows():
    if prob.getSolution(y[m_idx]) > 0.5:  # Check if the movie is scheduled (y[m_idx] = 1)
        scheduled_movies.append({
            'Movie Index': m_idx,
            'Movie Title': m_row['title']
        })

# Convert to DataFrame for better visualization
scheduled_movies_df = pd.DataFrame(scheduled_movies)

print("Scheduled Movies:")
print(scheduled_movies_df)


Scheduled Movies:
    Movie Index                               Movie Title
0             1                              Interstellar
1             2                           The Dark Knight
2             7                                Fight Club
3            10                              Forrest Gump
4            14                  The Shawshank Redemption
5            17                                   Titanic
6            22                   The Wolf of Wall Street
7            38  Harry Potter and the Prisoner of Azkaban
8            53                             The Godfather
9            59    Harry Potter and the Half-Blood Prince
10           93                              The Hangover
