In [None]:
# Setup
if __name__ == '__main__':
    import os
    # Change the current working directory to the parent directory of this file
    os.chdir(os.path.dirname(os.path.dirname(__vsc_ipynb_file__)))

from evaluation import get_actual_demand
from system_state import SystemState
from utils import load_problem_data
import numpy as np
import pandas as pd
import uuid
import json

demand, datacenters, servers, selling_prices = load_problem_data()
system_state = SystemState(datacenters, servers)

seed = 1234
np.random.seed(seed)
actual_demand = get_actual_demand(demand)

In [None]:
def save_results_as_actions(path: str, results: list[dict], server_gen: str):
    """
    Converts the results from a list of dictionaries of format:
    [{
        'buy_count': int,
        'datacenter_id': str,
        'buy_time_step': int,
        'dismiss_time_step': int
    }]
    to:
    [{
        'action': 'buy',
        'server_generation': str,
        'datacenter_id': str,
        'time_step': int,
        'server_id': str
    },
    {
        'action': 'dismiss',
        'server_generation': str,
        'datacenter_id': str,
        'time_step': int,
        'server_id': str
    }]
    """
    
    actions = []
    for entry in results:
        buy_count = int(entry['buy_count'])
        datacenter_id = entry['datacenter_id']
        buy_time_step = int(entry['buy_time_step'])
        dismiss_time_step = int(entry['dismiss_time_step'])
        
        # the loop doesn't work for some reason, use commented code below
        for _ in range(0, buy_count):
            server_id = str(uuid.uuid4())
            buy_entry = {
                'action': 'buy',
                'server_generation': server_gen,
                'datacenter_id': datacenter_id,
                'time_step': buy_time_step,
                'server_id': server_id
            }
            actions.append(buy_entry)

            dismiss_entry = {
                'action': 'dismiss',
                'server_generation': server_gen,
                'datacenter_id': datacenter_id,
                'time_step': dismiss_time_step,
                'server_id': server_id
            }
            actions.append(dismiss_entry)
        # server_id = str(uuid.uuid4())
        # buy_entry = {
        #     'action': 'buy',
        #     'server_generation': server_gen,
        #     'datacenter_id': datacenter_id,
        #     'time_step': buy_time_step,
        #     'server_id': server_id
        # }
        # actions.append(buy_entry)

        # dismiss_entry = {
        #     'action': 'dismiss',
        #     'server_generation': server_gen,
        #     'datacenter_id': datacenter_id,
        #     'time_step': dismiss_time_step,
        #     'server_id': server_id
        # }
        # actions.append(dismiss_entry)

    # sort by time step
    actions.sort(key=lambda x: x['time_step'])

    # write to json
    with open(path, 'w') as f:
        json.dump(actions, f, indent=4)


def get_sorted_servers(file_path: str):
    """Get list of tuples of sorted servers from a CSV file"""
    
    df = pd.read_csv(file_path)
    df_sorted = df.sort_values(by='profitability', ascending=False)
    sorted_servers = list(df_sorted[['server_generation', 'latency_sensitivity']].itertuples(index=False, name=None))
    return sorted_servers

In [None]:
# Simulate the algorithm with the most profitable server/latency


slots_size = 4
remaining_demand = actual_demand.copy()
results = []
sorted_servers = get_sorted_servers('data/test_data/most_profitable_servers_by_artem.csv')

for server_generation, latency_sensitivity in sorted_servers:
    print(f"Server generation: {server_generation}, Latency sensitivity: {latency_sensitivity}")
    while True:
        # 1) Find the ranges of time steps between which this server/latency is in demand
        relevant_demand = remaining_demand.query(f'server_generation == @server_generation and {latency_sensitivity} > 0')
        # print(relevant_demand)
        time_steps_of_demand = relevant_demand.get('time_step').to_numpy()
        # print(time_steps_of_demand)

        # DEBUG
        # Rmove a single time step from the middle of the range (to prove this step works for ranges with gaps of no demand)
        # time_steps_of_demand = np.delete(time_steps_of_demand, 20)
        # time_steps_of_demand = np.delete(time_steps_of_demand, 30)
        # time_steps_of_demand = np.delete(time_steps_of_demand, 58)


        time_steps_diff = np.diff(time_steps_of_demand)
        gap_indices = np.append(np.where(time_steps_diff > 1), len(time_steps_of_demand) - 1)

        ranges = []
        start = 0
        for gap in gap_indices:
            ranges.append((time_steps_of_demand[start], time_steps_of_demand[gap]))
            start = gap + 1

        # print(ranges)

        # 2) Merge ranges which have a negligibly small gap in between (arbitrary gap size? < 3 time steps?)
        # TODO


        # 3) Filter all ranges which last for less than the time it takes for the server/latency to break even

        # Taken manually from the profitability spreadsheet in the google drive
        break_even_time = {
            'GPU.S3': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'GPU.S2': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'GPU.S1': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'CPU.S4': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'CPU.S3': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'CPU.S2': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            },
            'CPU.S1': {
                'low': 12, # DC1
                'medium': 11, # DC2
                'high': 10 # DC3 and DC4
            }
        }

        break_even_time = break_even_time[server_generation][latency_sensitivity]
        ranges = [range for range in ranges if range[1] - range[0] >= break_even_time]

        print(ranges)


        # 3) For each range (from longest to shortest):
        sorted_ranges_i = np.argsort([range[1] - range[0] for range in ranges])
        for i in reversed(sorted_ranges_i):
            # i = 2
            range = ranges[i]
            print(range)

            # 1) Calculate the minimum demand across that range
            demand_in_range = relevant_demand.query(f'time_step >= @range[0] and time_step <= @range[1]')
            min_demand = demand_in_range.min()[latency_sensitivity]
            # 2) Calculate the number of servers to buy meet the minimum demand
            capacity = servers.set_index('server_generation').loc[server_generation]['capacity']
            desired_buy_count = int(np.round(min_demand / capacity))

            print(f"{min_demand}/{capacity} = {min_demand / capacity} ~~ {str(desired_buy_count)} GPUs")


            # 3) Validate the number of servers to buy against the available slots in the appropriate data centres
            def get_remaining_slots(system_state: SystemState, dc: str) -> int:
                return system_state.datacenter_capacity.query('datacenter_id == @dc')['slots_capacity'].iloc[0] - system_state.datacenter_capacity.query('datacenter_id == @dc')['used_slots'].iloc[0]
            
            remaining_slots = get_remaining_slots(system_state, 'DC3')
            max_buy_count = int(np.floor(remaining_slots / slots_size))
            actual_buy_count = min(desired_buy_count, max_buy_count)
            print(f"Buying from DC3 ({remaining_slots} slots for max {max_buy_count} GPUs remaining): {actual_buy_count} out of desired {desired_buy_count}")


            # 3.1) If the latency is "high", overflow leftover servers that don't fit into DC3 into DC4
            leftover_desired_buy_count = desired_buy_count - actual_buy_count
            if latency_sensitivity == 'high' and leftover_desired_buy_count > 0:
                dc4_remaining_slots = get_remaining_slots(system_state, 'DC4')
                dc4_max_buy_count = int(np.floor(dc4_remaining_slots / slots_size))
                dc4_actual_buy_count = min(leftover_desired_buy_count, dc4_max_buy_count)
                print(f"Buying from DC4 ({dc4_remaining_slots} slots for max {dc4_max_buy_count} GPUs remaining): {dc4_actual_buy_count} out of leftover {leftover_desired_buy_count}")

                if dc4_actual_buy_count > 0:
                    results.append({
                        'buy_count': str(dc4_actual_buy_count),
                        'datacenter_id': 'DC4',
                        'buy_time_step': str(range[0]),
                        'dismiss_time_step': str(range[1])
                    })

            # 4) Store the number of servers to buy, which data centre, the buy time step, the dismiss time step
            results.append({
                'buy_count': str(actual_buy_count),
                'datacenter_id': 'DC3',
                'buy_time_step': str(range[0]),
                'dismiss_time_step': str(range[1])
            })




            # 5) For each demand in the range, subtract the capacity * number of servers to buy
            demand_to_subtract = actual_buy_count * capacity
            print(f"Subtracting {demand_to_subtract} from the demand in the range")
            for index, row in demand_in_range.iterrows():
                remaining = row[latency_sensitivity] - demand_to_subtract
                remaining_demand.at[index, latency_sensitivity] = remaining

            # 6) Filter new demand values which are too low to buy at least 1 server for
            remaining_demand = remaining_demand.query(f'{latency_sensitivity} > {(capacity / 2) + 1}')

            # break
        # 4) Repeat steps 1.1 to 1.3.4 with the new demand values until there are no ranges after 1.2
        if len(ranges) == 0:
            print("No more ranges of demand to satisfy")
            break

# save_json('./results.json', results)
save_results_as_actions('./result_actions.json', results, server_generation)

In [None]:
# Other system state tests (ignore)
import pandas as pd


state = SystemState(datacenters, servers)
state.update_state([
    { 'action': 'buy', 'server_generation': 'CPU.S1', 'datacenter_id': 'DC1', 'time_step': 1, "server_id": "wasd" },
    # { 'action': 'buy', 'server_generation': 'CPU.S1', 'datacenter_id': 'DC1', 'time_step': 1, "server_id": "wasd2" },
    { 'action': 'buy', 'server_generation': 'CPU.S1', 'datacenter_id': 'DC1', 'time_step': 1, "server_id": "wasd3" },
    { 'action': 'buy', 'server_generation': 'GPU.S1', 'datacenter_id': 'DC1', 'time_step': 1, "server_id": "ijkl" }
    ])
fleet = state.fleet
# print(fleet)
# # print(datacenters)
# print(state.datacenter_capacity)
# print(state.datacenter_info)
# print(state.datacenter_capacity.query('datacenter_id == "DC1"')['slots_capacity'].iloc[0] - state.datacenter_capacity.query('datacenter_id == "DC1"')['used_slots'].iloc[0])



def get_min_above_threshold(df, column, threshold):
    result = df.query(f'{column} >= {threshold}')[column].min()
    return result if not pd.isna(result) else None

# Example usage
df = pd.DataFrame({'A': [1, 5, 3, 8, 2]})
result = get_min_above_threshold(df, 'A', 2)
print(result)  # Output: 2