In [2]:
import simpy
import random
from sklearn.cluster import SpectralClustering
import numpy as np

class VirtualMachine:
    def __init__(self, env, id, processing_capability, num_cpus, ram, storage_capacity, data_center, region, bw_params):
        self.env = env
        self.id = id
        self.processing_capability = processing_capability
        self.num_cpus = num_cpus
        self.ram = ram
        self.storage_capacity = storage_capacity
        self.data_center = data_center
        self.region = region
        self.bw_params = bw_params
        self.uptime = 0
        self.total_time = 0
        self.data_access_log = {}
        self.tasks_processed = 0
        self.load = 0  # Track the load of VM
        self.total_transfer_time = 0  # Track total transfer time for calculating metrics
    
    def update_availability(self, time_passed):
        self.total_time += time_passed
    
    def get_availability(self):
        if self.total_time == 0:
            return 1
        return self.uptime / self.total_time
    
    def calculate_data_transfer_time(self, size, target_dc):
        if self.data_center == target_dc:
            bw = self.bw_params['intra_dc']
        elif self.region == target_dc.region:
            bw = self.bw_params['intra_region']
        else:
            bw = self.bw_params['inter_region']
        transfer_time = size / bw['capacity'] + bw['delay']
        return transfer_time
    
    def process_task(self, task, target_dc):
        data_transfer_time = self.calculate_data_transfer_time(task['size'], target_dc)
        start_time = self.env.now
        total_processing_time = task['size'] / self.processing_capability + data_transfer_time
        yield self.env.timeout(total_processing_time)
        end_time = self.env.now

        self.tasks_processed += 1
        self.load = self.tasks_processed / self.total_time if self.total_time > 0 else 0
        self.total_transfer_time += data_transfer_time
        
        rst = end_time - start_time
        response_time = rst * 60
        sla_rt = 60  # SLORT in seconds
        sla_ma = 0.95  # SLOMA in seconds
        w = 0.8  # Define the weight factor
        th_rt = w * sla_rt

        penalty = 0
        near_violation = False
        if response_time > sla_rt:
            penalty = 0.0025
        elif response_time > th_rt:
            near_violation = True

        cost = self.data_center.calculate_cost(task['size'], target_dc) - penalty
        self.data_center.total_revenue += cost

        if near_violation:
            self.data_access_log[task['id']] = (task, response_time, data_transfer_time)

class DataCenter:
    def __init__(self, env, id, region, pricing):
        self.env = env
        self.id = id
        self.region = region
        self.pricing = pricing
        self.total_revenue = 0
        self.vms = []
    
    def add_vm(self, vm):
        self.vms.append(vm)
    
    def calculate_cost(self, size, target_dc):
        cpu_price = self.pricing.get('cpu_price', 0)
        bw_price = self.pricing['bw_price']
        if self == target_dc:
            bw_cost = bw_price['intra_dc']
        elif self.region == target_dc.region:
            bw_cost = bw_price['intra_region']
        else:
            bw_cost = bw_price['inter_region']
        return size * cpu_price + bw_cost * size

class CloudProvider:
    def __init__(self, env, id, num_regions, dcs_per_provider, num_vms_per_dc, vm_specs, pricing, bw_params):
        self.env = env
        self.id = id
        self.regions = []
        self.pricing = pricing
        self.bw_params = bw_params
        self.owned_vms = []
        self.offered_vms = []
        self.rented_vms = []
        for i in range(num_regions):
            region_id = f'{id}_Region_{i}'
            region = DataCenter(env, region_id, i, pricing)
            self.regions.append(region)
            num_dcs = random.randint(*dcs_per_provider)
            for j in range(num_dcs):
                dc_id = f'{region_id}_DC_{j}'
                data_center = DataCenter(env, dc_id, i, pricing)
                for k in range(num_vms_per_dc):
                    vm_id = f'{dc_id}_VM_{k}'
                    vm = VirtualMachine(env, vm_id, vm_specs['processing_capability'], vm_specs['num_cpus'],
                                        vm_specs['ram'], vm_specs['storage_capacity'], data_center, i, bw_params)
                    data_center.add_vm(vm)
                    self.owned_vms.append(vm)

    def get_total_revenue(self):
        return sum(dc.total_revenue for dc in self.regions)
    
    def offer_idle_vms(self):
        self.offered_vms = [vm for vm in self.owned_vms if vm.load < 0.5 and vm not in self.offered_vms]
    
    def rent_vms(self, other_provider):
        for vm in other_provider.offered_vms:
            if vm not in self.rented_vms:
                self.rented_vms.append(vm)
                if vm in other_provider.owned_vms:
                    other_provider.owned_vms.remove(vm)
                if vm in other_provider.offered_vms:
                    other_provider.offered_vms.remove(vm)

def spectral_clustering(data_access_log, n_clusters):
    if len(data_access_log) == 0:
        return []
    
    task_ids = list(data_access_log.keys())
    data = np.array([[log[1], log[2]] for log in data_access_log.values()])
    
    clustering = SpectralClustering(n_clusters=n_clusters, assign_labels='discretize', random_state=0).fit(data)
    clusters = {i: [] for i in range(n_clusters)}
    for idx, label in enumerate(clustering.labels_):
        clusters[label].append(task_ids[idx])
    return clusters

def place_replicas(clusters, providers):
    for cluster_id, tasks in clusters.items():
        for task_id in tasks:
            task = next((t for p in providers for dc in p.regions for vm in dc.vms for t in vm.data_access_log.values() if t[0]['id'] == task_id), None)
            if task:
                # Find a VM to place the replica
                target_provider = random.choice(providers)
                target_dc = random.choice(target_provider.regions)
                target_vm = random.choice(target_dc.vms)
                if task_id not in target_vm.data_access_log:
                    target_vm.data_access_log[task_id] = task

def run_simulation(env, providers, tasks, P, K):
    while True:
        for provider in providers:
            provider.offer_idle_vms()
            for other_provider in providers:
                if other_provider != provider:
                    provider.rent_vms(other_provider)
            
            for dc in provider.regions:
                for vm in dc.vms:
                    if len(vm.data_access_log) >= P:
                        clusters = spectral_clustering(vm.data_access_log, K)
                        place_replicas(clusters, providers)
                        vm.data_access_log.clear()
        yield env.timeout(1)  # Periodic check every simulation time unit

# Metrics Calculation
def calculate_transfer_time_ratio(vm):
    if not vm.data_access_log:
        return 0
    DTT = vm.total_transfer_time / len(vm.data_access_log)
    LDTT = min(vm.bw_params['intra_dc']['capacity'], vm.bw_params['intra_region']['capacity'], vm.bw_params['inter_region']['capacity']) / max(vm.bw_params['intra_dc']['delay'], vm.bw_params['intra_region']['delay'], vm.bw_params['inter_region']['delay'])
    return DTT / LDTT if LDTT != 0 else 0

def calculate_load(vm):
    return vm.load

def calculate_data_availability(vm):
    return vm.get_availability()

def calculate_profit(provider):
    total_revenue = provider.get_total_revenue()
    total_cost = 0
    for dc in provider.regions:
        for vm in dc.vms:
            cost = vm.tasks_processed * provider.pricing['cpu_price'] + vm.tasks_processed * provider.pricing['storage_price']
            if vm.data_center == dc:
                bw_cost = provider.pricing['bw_price']['intra_dc'] * vm.tasks_processed
            elif vm.region == dc.region:
                bw_cost = provider.pricing['bw_price']['intra_region'] * vm.tasks_processed
            else:
                bw_cost = provider.pricing['bw_price']['inter_region'] * vm.tasks_processed
            total_cost += cost + bw_cost
    profit = total_revenue - total_cost
    return profit

# Simulation setup
env = simpy.Environment()

# Simulation parameters
num_providers = 3
num_regions = 3
dcs_per_provider = (2, 5)
num_vms_per_dc = 8
vm_specs = {
    'processing_capability': 1500,  # in MIPS
    'num_cpus': 2,
    'ram': 4,  # in Gb
    'storage_capacity': 8  # in Gb
}
num_data = 10000
task_size_range = (200, 1000)  # Task size range in MI

# Bandwidth parameters
bw_params = {
    'inter_region': {'capacity': 500, 'delay': 150},
    'intra_region': {'capacity': 1000, 'delay': 50},
    'intra_dc': {'capacity': 8000, 'delay': 10}
}

pricing = {
    'Provider_1': {
        'cpu_price': 0.01,
        'storage_price': 0.001,
        'bw_price': {'intra_dc': 0.001, 'intra_region': 0.002, 'inter_region': 0.01}
    },
    'Provider_2': {
        'cpu_price': 0.0095,
        'storage_price': 0.0012,
        'bw_price': {'intra_dc': 0.001, 'intra_region': 0.002, 'inter_region': 0.008}
    },
    'Provider_3': {
        'cpu_price': 0.009,
        'storage_price': 0.0096,
        'bw_price': {'intra_dc': 0.0015, 'intra_region': 0.002, 'inter_region': 0.008}
    }
}

providers = []
for i in range(num_providers):
    provider_key = f'Provider_{i+1}'
    if provider_key in pricing:
        provider = CloudProvider(env, provider_key, num_regions, dcs_per_provider, num_vms_per_dc, vm_specs, pricing[provider_key], bw_params)
        providers.append(provider)
    else:
        print(f"Warning: Pricing not found for {provider_key}")

tasks = [{'id': i, 'size': random.randint(*task_size_range)} for i in range(num_data)]

# Run the simulation
env.process(run_simulation(env, providers, tasks, 32, 3))
env.run(until=10)
