# Summary of Results and Algorithm:


## Read input

In [2]:
# Imports
import pandas as pd
import numpy as np
# Read data into dataframe
df = pd.read_csv('./google-cluster-data-1.csv',sep=' ')

In [3]:
# Print the length of the df
print("length of df: ", len(df))

length of df:  3535029


Get CPU and MEM information

In [4]:
# Show amount of CPU for each task
df['NrmlTaskCores'].values

array([0.      , 0.      , 0.021875, ..., 0.      , 0.      , 0.      ])

In [5]:
# Save CPU amount, memory requirements, and task IDs
cpus = df['NrmlTaskCores'].values
mems = df['NrmlTaskMem'].values
task_ids = df['TaskID'].values

In [6]:
# Calculate and save execution times for two cases: mod 2 and mod 10
temp1 = df['TaskID'].values
ExecutionTime1 = []
for i in temp1:
    burstTime = (i%2+1)*300
    ExecutionTime1.append(burstTime)
    
temp2 = df['TaskID'].values
ExecutionTime2 = []
for i in temp2:
    burstTime = (i%10+1)*300
    ExecutionTime2.append(burstTime)

In [7]:
# Calculate and print total # of subtasks for two cases: mod 2 and mod 10
totalSubtask1 = 0
for i in ExecutionTime1:
    totalSubtask1 += i/300
print("Total Subtasks for setting 1:", int(totalSubtask1))

totalSubtask2 = 0
for i in ExecutionTime2:
    totalSubtask2 += i/300
    
print("Total Subtasks for setting 2:", int(totalSubtask2))

Total Subtasks for setting 1: 5298274
Total Subtasks for setting 2: 19472018


In [8]:
# Show dataframe
df.head()

Unnamed: 0,Time,ParentID,TaskID,JobType,NrmlTaskCores,NrmlTaskMem,Unnamed: 6
0,90000,757745334,1488529826,0,0.0,0.03113,
1,90000,975992247,1488529821,0,0.0,0.0,
2,90000,1468458091,1488529832,1,0.021875,0.002353,
3,90000,1460281235,1488529840,0,0.0,0.0,
4,90000,1164728954,1488529835,0,0.003125,0.001638,


In [9]:
# Save calculated execution times to columns in the dataframe
df['executionTime1'] = ExecutionTime1
df['executionTime2'] = ExecutionTime2

### Setting 1

In [13]:
# Calculates energy consumption for one vm
def getEnergyConsumption(VMCPUUsage, totalCPUCount, threshold):
    
    #here we want to calculate the cpu usage rate per individual VM so i look at the individual
    #cpu usage rate and divide it over 7/100 or 25/100
    cpuUsageRate = VMCPUUsage/(totalCPUCount/100.0)
    
    static = 0
    if cpuUsageRate > 0:
        static = 5 
    else:
        static = 0
        
    dynamic = 0
    if cpuUsageRate < threshold:
        dynamic = cpuUsageRate*100
    else:
        dynamic = threshold * 100 + (((cpuUsageRate - threshold)**2) * 200)
    #print(dynamic, static)
    return static + dynamic

In [14]:
# Calculates cost for one vm
def getCost(time, power):
    costChart = [0.5, 0.5, 0.6, 0.6, 0.6, 0.7, 0.7, 0.6, 0.6, 0.8, 0.8, 0.8, 0.8]
    mapping = [90000,91875,93750,95625,97500,99375,101250,103125,105000,106875,108750,110625]
    cost = 0
    for i in range(12):
        if time == mapping[i]:
            cost = costChart[i]
            break
        elif time < mapping[i]:
            cost = costChart[i-1]
    return cost*power
    

In [21]:
# Schedule all tasks in a way that minimizes the power consumption
def GreedyPower(input_data, nextQueue, threshhold, executionTime,cpuTotal, memTotal):
    length = len(input_data)
    nextQueue = []
    VMs = [[cpuTotal, memTotal] for i in range(100)]
    energyUsage = 0
    
    for index, row in input.iterrows():
        cpu, mem = row['NrmlTaskCores'], row['NrmlTaskMem']
        task_id = row['TaskID']
        found_VM = False
        lowestPower = float('inf')
        lowestPowerIndex = -1
        VM_id = 0
        
        #iterate through all of the vms
        for j in range(100):
            #check whether this is even schedulable
            if VMs[VM_id][0] >= cpu and VMs[VM_id][1] >= mem:
                energy = getEnergyConsumption(VMs[VM_id][0], cpuTotal, threshhold)
                if energy < lowestPower:
                    lowestPower = energy
                    lowestPowerIndex = VM_id
                    
        #is schedulable thank the lord
        if lowestPowerIndex != -1:
            cpu = VMs[lowestPowerIndex][0]
            mem = VMs[lowestPowerIndex][1]
            
            VMs[lowestPowerIndex][0] = VMs[lowestPowerIndex][0] - cpu
            VMs[lowestPowerIndex][1] = VMs[lowestPowerIndex][1] - mem
        else:
            rejected.append(row['TaskID'])
                
        if row[executionTime] > 300:
            row[executionTime] -= 300
            nextQueue.append(row)
            
    return rejected, nextQueue

In [None]:
def GreedyCost(input_data, nextQueue, threshhold, executionTime,cpuTotal, memTotal):
    length = len(input_data)
    nextQueue = []
    VMs = [[cpuTotal, memTotal] for i in range(100)]
    cost = 0
    
    for index, row in input.iterrows():
        cpu, mem = row['NrmlTaskCores'], row['NrmlTaskMem']
        task_id = row['TaskID']
        found_VM = False
        lowestCost = float('inf')
        lowestCostIndex = -1
        VM_id = 0
        
        #iterate through all of the vms
        for j in range(100):
            #check whether this is even schedulable
            if VMs[VM_id][0] >= cpu and VMs[VM_id][1] >= mem:
                energy = getEnergyConsumption(VMs[VM_id][0], cpuTotal, threshhold)
                cost = getCost(row['Time'], energy) # time, power arguments go here: FILL IN 
                if cost < lowestCost:
                    lowestCost = cost
                    lowestCostIndex = VM_id
                    
        #is schedulable thank the lord
        if lowestCostIndex != -1:
            cpu = VMs[lowestCostIndex][0]
            mem = VMs[lowestCostIndex][1]
            
            VMs[lowestCostIndex][0] = VMs[lowestCostIndex][0] - cpu
            VMs[lowestCostIndex][1] = VMs[lowestCostIndex][1] - mem
        else:
            rejected.append(row['TaskID'])
                
        if row[executionTime] > 300:
            row[executionTime] -= 300
            nextQueue.append(row)
            
    return rejected, nextQueue

In [None]:
# Run greedy power for CPU = 7, Mem = 11
VMs = [[7, 11] for i in range(100)]
setty = set(df['Time'].values)
setty = sorted(setty)
rejected = []
nextQueue = []
print(len(setty))
for i in setty:
    taskQueue = df.loc[df['Time'] == i]
    taskQueue = taskQueue.append(nextQueue)
    print(i)
    returnObj = GreedyPower(taskQueue, nextQueue, 0.5, 'executionTime1', 7, 11)
    rejected += returnObj[0]
    nextQueue = returnObj[1]
    
print("rejected tasks", rejected)
if len(rejected) > 0:
    np.save("taskReject_1_i", rejected)
np.save("VMs_1_i", VMs)

76
90000
90300
90600
90900
91200
91500
91800
92100
92400
92700
93000
93300


In [None]:
# Run greedy cost for CPU = 7, Mem = 11
VMs = [[7, 11] for i in range(100)]
setty = set(df['Time'].values)
setty = sorted(setty)
rejected = []
nextQueue = []
print(len(setty))
for i in setty:
    taskQueue = df.loc[df['Time'] == i]
    taskQueue = taskQueue.append(nextQueue)
    print(i)
    returnObj = GreedyCost(taskQueue, nextQueue, 0.5, 'executionTime1', 7, 11)
    rejected += returnObj[0]
    nextQueue = returnObj[1]
    
print("rejected tasks", rejected)
if len(rejected) > 0:
    np.save("taskReject_1_ii", rejected)
np.save("VMs_1_ii", VMs)

### Setting 2

In [189]:
# Run greedy power for CPU = 25 and Mem = 40
VMs = [[25, 40] for i in range(100)]
setty = set(df['Time'].values)
setty = sorted(setty)
rejected = []
nextQueue = []
for i in setty:
    taskQueue = df.loc[df['Time'] == i]
    taskQueue = taskQueue.append(nextQueue)
   
    returnObj = GreedyPower(taskQueue, nextQueue, 0.9, 'executionTime2', 25, 40)
    rejected += returnObj[0]
    nextQueue = returnObj[1]

if len(rejected) > 0:
    np.save("taskReject_2_i", rejected)
np.save("VMs_2_i", VMs)

In [191]:
print("Energy Used:", energyUsage2)
print("Cost Incurred:", totalCost2)

Energy Used: 6614.281146138474
Cost Incurred: 4736.899788792278


In [192]:
# Run greedy cost for CPU = 25 and Mem = 40
VMs = [[25, 40] for i in range(100)]
setty = set(df['Time'].values)
setty = sorted(setty)
rejected = []
nextQueue = []
for i in setty:
    taskQueue = df.loc[df['Time'] == i]
    taskQueue = taskQueue.append(nextQueue)
   
    returnObj = GreedyCost(taskQueue, nextQueue, 0.9, 'executionTime2', 25, 40)
    rejected += returnObj[0]
    nextQueue = returnObj[1]

if len(rejected) > 0:
    np.save("taskReject_2_ii", rejected)
np.save("VMs_2_ii", VMs)