In [11]:
!python -m pip install --upgrade --user ortools
!pip install  ortools

Defaulting to user installation because normal site-packages is not writeable


# Part 1: Mathematical Programming

Below `CloudResourceAllocation` class model the cloud resource allocation dataset that contains: 
> N = number of jobs,
>
> Q1 = CPU capacity,
>
> Q2 = memory capacity AND
>
> a list of cloud resources **jobs** that contains these columns: 
> > **ID, CPU demand, memory demand,and payment of a job.**

The goal of the resource allocation problem is to decide which job to accept (and which to decline), so that the CPU and memory capacity is not exceeded by the accepted jobs, and the total charged payment is maximised.

The goal of the resource allocation problem is to decide which job to accept (and which to decline), so that the CPU and memory capacity is not exceeded by the accepted jobs, and the total charged payment is maximised

Below defines the **Bounding method**
> Bounding: This is to find the upper/lower bound of the optimal solution of a branch/sub-problem based on optimistic estimate.

Relax the integer constraints of the $x_i$ so that the variables can take continous values. It refers to the cpu and memory capacity can take continous values.


In [12]:
from distutils.command.build_scripts import first_line_re
from tkinter.tix import COLUMN
import pandas as pd
import numpy as np
from ortools.linear_solver import pywraplp
from ortools.init import pywrapinit
from ortools.sat.python import cp_model
# Import deque for the stack structure, copy for deep copy nodes
from collections import deque
import copy

class CloudResourceAllocation:
    '''For each instance, we have following fields: 
            the 1st line of the csv files contains the number of jobs N,
            2nd line contains the CPU capacity Q1, and memory capacity Q2, 
            3rd line onwards contains the 
                    ID, CPU demand, memory demand,
                    and payment of a job.
    '''
        
    # main constructor 
    def __init__(self, N, Q1, Q2, jobs):
        '''N is the number of jobs, i.e. len(jobs)'''
        self.N = N
        self.Q1 = Q1
        self.Q2 = Q2
        self.jobs = jobs
        
    @classmethod
    def constructFromFile(cls, filePath):
        '''Read from file and construct an instance of CloudResourceAllocation'''
        with open(filePath, 'r') as file:
            first_line = file.readline()
            second_line = file.readline()
            N = int(first_line.split(',')[0])
            Q1,Q2 = int(second_line.split(',')[0]),int(second_line.split(',')[1])
            
        jobs = pd.read_csv(filePath, skiprows=range(2), header=None)
        jobs.columns = ['ID', 'CPUDemand', 'MemoryDemand','payment']
        return cls(N, Q1, Q2, jobs)
    
    def define_maths_models(self):
        '''For defining the mathematical models.
            https://developers.google.com/optimization/cp/channeling
        Maximize the total payment counted by the selected jobs denoated as CiXi where the job i is charged a payment of Ci, subject to the following constraints:
            1. The selected accepted jobs' CPU demand must be less than or equal to the CPU capacity Q1.
            2. The selected accepted jobs' memory demand must be less than or equal to the memory capacity Q2.
            3. xi is binary, i.e. 0 or 1. if xi = 1, then the job is selected.
            4. i : 1 to N, i.e. the i-th job is selected if xi = 1.
        
        '''
        self.solver = pywraplp.Solver('SolveAssignmentProblemMIP', pywraplp.Solver.CBC_MIXED_INTEGER_PROGRAMMING)
        # self.solver = pywraplp.Solver.CreateSolver('SCIP')
        # self.solver = cp_model.CpModel()
        if not self.solver:
            return
        # define variable, we only need N number of x for each job
        # other variable like c, di1,di2, ...,dn will be used as the coefficients 
        self.x = {}
        for i in range(self.N):
            self.x[i] = self.solver.IntVar(0, 1, 'x[%i]' % i)
        # print(f"num of variables: {self.solver.NumVariables()}")
        
        constraint_expr1,constraint_expr2, obj_express= [],[],[]
        for i in range(self.N):
            constraint_expr1.append(self.jobs['CPUDemand'][i] * self.x[i])
            constraint_expr2.append(self.jobs['MemoryDemand'][i] * self.x[i])
            obj_express.append(self.jobs['payment'][i] * self.x[i])
        # define constraints
        self.solver.Add(sum(constraint_expr1)<= self.Q1,"cpu_capacity_constraint")
        self.solver.Add(sum(constraint_expr2) <= self.Q2,"memory_capacity_constraint")
        
        # define objective function
        self.solver.Maximize(sum(obj_express))
        
    
    def solve_assignment_problem(self):
        '''Solve the assignment problem'''
        status = self.solver.Solve()
        if status == pywraplp.Solver.OPTIMAL:
            # print 2 constraints: cpu capacity constraint and memory capacity constraint
            total_cpu_used , total_memory_used = 0,0
            x_sol_vals_str = ""
            for j in range(self.N):
                x_temp = self.x[j].solution_value()
                x_sol_vals_str += f"\t{self.x[j].name()} : {x_temp} \n"
                # count the total cpu and memory used
                total_cpu_used += self.jobs['CPUDemand'][j] * x_temp
                total_memory_used += self.jobs['MemoryDemand'][j] * x_temp
                
            print(f'Objective value(Total Payment) ={self.solver.Objective().Value()}')
            print(f"Total CPU used = {total_cpu_used} out of the CPU Capacity:{self.Q1}")
            print(f"Total Memory used = {total_memory_used} out of the Memory Capacity:{self.Q2}")
            print(f"Solution: (The obtained  xi value): \n{x_sol_vals_str}")
            # Statistics.
            print('-'*15)
            print('Statistics')
            print(' Problem solved in %f milliseconds ' % self.solver.wall_time())
            print(' Problem solved in %d iterations ' % self.solver.iterations())
            print(' Problem solved in %d branch-and-bound nodes ' % self.solver.nodes())
        else:
            print('The problem does not have an optimal solution.')

    
    def get_jobs(self):
        return self.jobs
    
    def __str__(self) -> str:
        return f'N: {self.N}, \nCPU Capacity Q1: {self.Q1}, Memort Capacity Q2: {self.Q2}, \n Jobs left:\n{self.jobs}'
    
    
smallFilePath = '../cloud_resource_allocation/small.csv'
largeFilePath = '../cloud_resource_allocation/large.csv'
smallDS = CloudResourceAllocation.constructFromFile(smallFilePath)
largeDS = CloudResourceAllocation.constructFromFile(largeFilePath)
# smallDS.get_jobs()['CPUDemand'][0]

In [13]:
print('-'*80)
print("Small Cloud Resource Allocation dataset:")
print('-'*20)
smallDS.define_maths_models()
smallDS.solve_assignment_problem()
print('-'*80)
print()

--------------------------------------------------------------------------------
Small Cloud Resource Allocation dataset:
--------------------
Objective value(Total Payment) =5647.0
Total CPU used = 886.0 out of the CPU Capacity:1000
Total Memory used = 1989.0 out of the Memory Capacity:2000
Solution: (The obtained  xi value): 
	x[0] : 1.0 
	x[1] : 1.0 
	x[2] : 1.0 
	x[3] : 0.0 
	x[4] : 1.0 
	x[5] : 1.0 
	x[6] : 1.0 
	x[7] : 1.0 
	x[8] : 1.0 
	x[9] : 1.0 

---------------
Statistics
 Problem solved in 5.000000 milliseconds 
 Problem solved in 0 iterations 
 Problem solved in 0 branch-and-bound nodes 
--------------------------------------------------------------------------------



In [14]:
print('-'*80)
print("Large Cloud Resource Allocation dataset:")
print('-'*20)
largeDS.define_maths_models()
largeDS.solve_assignment_problem()
print('-'*80)

--------------------------------------------------------------------------------
Large Cloud Resource Allocation dataset:
--------------------
Objective value(Total Payment) =29459.0
Total CPU used = 9997.0 out of the CPU Capacity:10000
Total Memory used = 9859.0 out of the Memory Capacity:10000
Solution: (The obtained  xi value): 
	x[0] : 0.0 
	x[1] : 0.0 
	x[2] : 0.0 
	x[3] : 0.0 
	x[4] : 1.0 
	x[5] : 0.0 
	x[6] : 0.0 
	x[7] : 0.0 
	x[8] : 0.0 
	x[9] : 0.0 
	x[10] : 0.0 
	x[11] : 0.0 
	x[12] : 0.0 
	x[13] : 0.0 
	x[14] : 0.0 
	x[15] : 0.0 
	x[16] : 0.0 
	x[17] : 0.0 
	x[18] : 0.0 
	x[19] : 0.0 
	x[20] : 0.0 
	x[21] : 0.0 
	x[22] : 0.0 
	x[23] : 0.0 
	x[24] : 0.0 
	x[25] : 0.0 
	x[26] : 0.0 
	x[27] : 0.0 
	x[28] : 0.0 
	x[29] : 0.0 
	x[30] : 0.0 
	x[31] : 0.0 
	x[32] : 0.0 
	x[33] : 0.0 
	x[34] : 1.0 
	x[35] : 0.0 
	x[36] : 0.0 
	x[37] : 0.0 
	x[38] : 0.0 
	x[39] : 0.0 
	x[40] : 0.0 
	x[41] : 0.0 
	x[42] : 0.0 
	x[43] : 0.0 
	x[44] : 0.0 
	x[45] : 0.0 
	x[46] : 0.0 
	x[47] : 0.0 
	x[4

In [15]:
# # large = CloudResourceAllocation(largeFilePath)
# print(smallDS.__str__())
# print('-'*70)
# print(largeDS.__str__())

In [16]:
# len(smallDS.get_jobs())
# smallDS.get_jobs().iloc[1:]['CPUDemand'] 
# smallDS.get_jobs().iloc[0:]

## The following code is inspired from the given tutorial on [GitHub](https://github.com/meiyi1986/tutorials/blob/master/notebooks/knapsack-branch-bound.ipynb)

`They are not used as the final submitted solution of the part 1`

In [17]:
# branch and bound 
import fractions
def bounding(ds:CloudResourceAllocation):
    bound = 0
    
    # payments, weights, q1_cpu_capacity, q2_memory_capacity 
    remaining_q1_cpu_capacity ,remaining_q2_memory_capacity = ds.Q1,ds.Q2
    
    # define the efficiency by adding payment per cpuDemand and payment per memoryDemand
    efficiency = [ds.get_jobs().iloc[i]['payment'] / ds.get_jobs().iloc[i]['CPUDemand'] 
                    + ds.get_jobs().iloc[i]['payment'] / ds.get_jobs().iloc[i]['MemoryDemand'] for i in range(len(ds.get_jobs()))]
    sorted_idx = sorted(range(len(efficiency)), reverse=True, key=efficiency.__getitem__)

    for i in sorted_idx:
        q1_exceed = ds.get_jobs().iloc[i]['CPUDemand'] > remaining_q1_cpu_capacity
        q2_exceed = ds.get_jobs().iloc[i]['MemoryDemand'] >remaining_q2_memory_capacity 
        if q1_exceed or q2_exceed :
            # fraction of the job that can be allocated
            # fraction = min(remaining_q1_cpu_capacity / ds.get_jobs().iloc[i]['CPUDemand'],
            #                 remaining_q2_memory_capacity / ds.get_jobs().iloc[i]['MemoryDemand'])
            fraction = remaining_q1_cpu_capacity / ds.get_jobs().iloc[i]['CPUDemand'] if q1_exceed else remaining_q2_memory_capacity / ds.get_jobs().iloc[i]['MemoryDemand']

            frac_value = ds.get_jobs().iloc[i]['payment'] * fraction
            bound += frac_value
            return bound
            
        bound += ds.get_jobs().iloc[i]['payment']
        remaining_q1_cpu_capacity -= ds.get_jobs().iloc[i]['CPUDemand']
        remaining_q2_memory_capacity -= ds.get_jobs().iloc[i]['MemoryDemand']
    return bound

In [18]:


def cloudResourceAllocation_bb_dfs(ds:CloudResourceAllocation):#(values, weights, capacity):
    # payments, weights, q1_cpu_capacity, q2_memory_capacity 
    remaining_q1_cpu_capacity ,remaining_q2_memory_capacity = ds.Q1,ds.Q2
    
    # Initialise the root, where 'expanded_item' indicates the item to be expanded at this node
    root = {
        'solution': [0] * len(ds.get_jobs()),
        'total payment': 0,
        'total cpu used': 0,
        'total memory used': 0,
        'expanded_item': 0
    }
    
    # Initially, the fringe contains the root node only
    best_solution = root
    fringe = deque()
    fringe.append(root)
    
    while len(fringe) > 0:
        # Depth-first-search, Last-In-First-Out of the stack
        node = fringe.pop()
        
        # Check if the node is a leaf node
        if node['expanded_item'] == len(ds.get_jobs()):
            if node['total payment'] > best_solution['total payment']:
                best_solution = node
                continue
        
        # Obtain the sub-problem: values, weights, capacity
        node_sub_jobs = ds.get_jobs().iloc[node['expanded_item']:]
        node_sub_q1_cpu_capacity = ds.Q1 - node['total cpu used']
        node_sub_q2_mem_capacity = ds.Q2 - node['total memory used']
        
        # Bounding on the sub-problem, and then add the value of the current solution
        bound = node['total payment'] + bounding(
            CloudResourceAllocation(
                len(node_sub_jobs),
                node_sub_q1_cpu_capacity,
                node_sub_q2_mem_capacity,
                node_sub_jobs)
        )
        # Prune the branch
        if bound <= best_solution['total payment']:
            continue
            
        # Branching on the expanded item, 0 or 1
        expanded_item = node['expanded_item']
        
        # Child 1: unselect the expanded item
        child1 = copy.deepcopy(node)
        child1['solution'][expanded_item] = 0
        child1['expanded_item'] = expanded_item + 1
        fringe.append(child1)
        
        # Child 2: select the expanded item if the capacity is enough
        new_cpu_demand = node['total cpu used']+ds.get_jobs().iloc[expanded_item]['CPUDemand']
        new_mem_demand = node['total memory used']+ds.get_jobs().iloc[expanded_item]['MemoryDemand']
        
        if new_cpu_demand <= ds.Q1 and new_mem_demand <= ds.Q2:
            child2 = copy.deepcopy(node)
            child2['solution'][expanded_item] = 1
            child2['total payment'] = node['total payment']+ ds.get_jobs().iloc[expanded_item]['payment']
            child2['total cpu used'] = new_cpu_demand
            child2['total memory used'] = new_mem_demand
            child2['expanded_item'] = expanded_item + 1
            fringe.append(child2)
    return best_solution


In [19]:
def printResult(ds:CloudResourceAllocation):
    for k,v in cloudResourceAllocation_bb_dfs(ds).items():
        suffix = ''
        # if k contains cpu
        if k.find('cpu') != -1:
            suffix = f" out of {ds.Q1}"
        if k.find('mem') != -1:
            suffix = f" out of {ds.Q2}"
        
        print(f"{k}: {v} {suffix}")
    print('-'*50)
    print('-'*50)
    return cloudResourceAllocation_bb_dfs(ds)

In [20]:
# printResult(smallDS)
# printResult(largeDS)
# print("Incorrect, hence they are deprecated, the above that use Google-OR tool is the correct way to do it")

# Part 2: Greedy Heuristic
