In [5]:
import numpy as np
from sklearn.cluster import SpectralClustering
import skfuzzy as fuzz
from skfuzzy import control as ctrl

# Define fuzzy variables and membership functions
data_transfer_time_ratio = ctrl.Antecedent(np.arange(0, 1.1, 0.1), 'Data transfer time ratio')
vm_load = ctrl.Antecedent(np.arange(0, 1.1, 0.1), 'Virtual Machine load')
data_availability = ctrl.Antecedent(np.arange(0, 1.1, 0.1), 'Data availability')
provider_profit = ctrl.Antecedent(np.arange(0, 1.1, 0.1), 'Provider profit')
placement_potential = ctrl.Consequent(np.arange(0, 1.1, 0.1), 'Placement potential')

# Define fuzzy membership functions
data_transfer_time_ratio.automf(3)
vm_load.automf(3)
data_availability.automf(3)
provider_profit.automf(3)
placement_potential.automf(5)

# Define fuzzy rules
rule1 = ctrl.Rule(data_transfer_time_ratio['poor'] & vm_load['poor'] & data_availability['poor'] & provider_profit['poor'], placement_potential['poor'])
rule2 = ctrl.Rule(data_transfer_time_ratio['poor'] & vm_load['average'] & data_availability['poor'] & provider_profit['average'], placement_potential['mediocre'])
rule3 = ctrl.Rule(data_transfer_time_ratio['average'] & vm_load['average'] & data_availability['poor'] & provider_profit['average'], placement_potential['average'])
rule4 = ctrl.Rule(data_transfer_time_ratio['average'] & vm_load['average'] & data_availability['average'] & provider_profit['average'], placement_potential['decent'])
#rule5 = ctrl.Rule(data_transfer_time_ratio['good'] & vm_load['good'] & data_availability['average'] & provider_profit['good'], placement_potential['good'])

# Create and simulate fuzzy inference system
replication_ctrl = ctrl.ControlSystem([rule1, rule2, rule3, rule4])
replication_sim = ctrl.ControlSystemSimulation(replication_ctrl)

# Define cloud providers, their regions, and prices
cloud_providers = {
    'Provider 1': {
        'US': {'CPU': 0.020, 'Storage': 0.006, 'BW': 0.001},
        'EU': {'CPU': 0.025, 'Storage': 0.006, 'BW': 0.0015},
        'AS': {'CPU': 0.027, 'Storage': 0.0066, 'BW': 0.002}
    },
    'Provider 2': {
        'US': {'CPU': 0.020, 'Storage': 0.0096, 'BW': 0.001},
        'EU': {'CPU': 0.018, 'Storage': 0.0096, 'BW': 0.0015},
        'AS': {'CPU': 0.020, 'Storage': 0.0096, 'BW': 0.002}
    },
    'Provider 3': {
        'US': {'CPU': 0.0095, 'Storage': 0.0012, 'BW': 0.001},
        'EU': {'CPU': 0.009, 'Storage': 0.0096, 'BW': 0.0015},
        'AS': {'CPU': 0.008, 'Storage': 0.009, 'BW': 0.002}
    },
}

# Replica placement logic
def place_replica(provider_prices, placement_result):
    best_provider = max(placement_result, key=lambda provider: placement_result[provider])
    return best_provider

# Adjust the spectral clustering parameters
def spectral_clustering(data, k):
    n_samples = data.shape[0]
    n_neighbors = min(15, n_samples - 1)  # Set n_neighbors to a maximum of n_samples - 1
    spectral = SpectralClustering(n_clusters=k, affinity='rbf', gamma=1.0, n_neighbors=n_neighbors)
    labels = spectral.fit_predict(data)
    return labels



# Simulate cloud system and obtain data related to SLA violations
def simulate_cloud_system():
    cloud_data = []
    for provider_id, (provider_name, regions) in enumerate(cloud_providers.items(), start=1):
        for region_name, prices in regions.items():
            num_data_centers = np.random.randint(2, 6)
            for dc_id in range(1, num_data_centers + 1):
                for vm_id in range(1, 9):
                    num_tasks = np.random.randint(1000, 10001)
                    for task_id in range(1, num_tasks + 1):
                        task_size = np.random.randint(200, 1001)
                        num_data = np.random.randint(300, 1001)

                        response_time = np.random.randint(120, 241)
                        availability = np.random.uniform(0.9, 1.0)

                        sla_violation = (
                            response_time > 180 or
                            availability < 0.95
                        )

                        cloud_data.append({
                            'provider_id': provider_id,
                            'region_name': region_name,
                            'dc_id': dc_id,
                            'vm_id': vm_id,
                            'task_id': task_id,
                            'task_size': task_size,
                            'num_data': num_data,
                            'response_time': response_time,
                            'availability': availability,
                            'sla_violation': sla_violation
                        })

    return cloud_data

# Call the simulation function
cloud_data = simulate_cloud_system()

# Identify data points likely to cause SLA violations using spectral clustering
data_for_clustering = np.array([
    [data_point['response_time'], data_point['availability']] for data_point in cloud_data
])

# Sample a subset of the data (adjust the size based on your available memory)
sample_size = min(5000, len(cloud_data))  # Adjust the sample size based on your available memory
sampled_data = np.array([
    [data_point['response_time'], data_point['availability']] for data_point in np.random.choice(cloud_data, sample_size)
])

# Call spectral clustering with the sampled data
k_clusters = 3  # Number of clusters
data_labels = spectral_clustering(sampled_data, k_clusters)

# Iterate through identified clusters and apply fuzzy inference for replica placement
for cluster_label in range(k_clusters):
    cluster_data = [cloud_data[i] for i, label in enumerate(data_labels) if label == cluster_label]
    for data_point in cluster_data:
        replication_sim.input['Data transfer time ratio'] = data_point['response_time'] / 240  # Normalize to [0, 1]
        replication_sim.input['Virtual Machine load'] = data_point['vm_id'] / 8  # Normalize to [0, 1]
        replication_sim.input['Data availability'] = data_point['availability']
        replication_sim.input['Provider profit'] = cloud_providers[f"Provider {data_point['provider_id']}"][
            data_point['region_name']]['CPU']  # Use CPU price as a proxy for profit

        replication_sim.compute()

        # Get the result and use it for replica placement
        placement_result = {}
        for region, provider_prices in cloud_providers.items():
            replication_sim.compute()  # Make sure to compute before accessing the output
            placement_result[region] = replication_sim.output['Placement potential']


        # Implement replica placement logic using placement_result
        selected_provider = place_replica(cloud_providers, placement_result)
        print(f"Replica for data point {data_point} placed on {selected_provider}")

Replica for data point {'provider_id': 1, 'region_name': 'US', 'dc_id': 1, 'vm_id': 1, 'task_id': 6, 'task_size': 886, 'num_data': 843, 'response_time': 212, 'availability': 0.9558481486925325, 'sla_violation': True} placed on Provider 1
Replica for data point {'provider_id': 1, 'region_name': 'US', 'dc_id': 1, 'vm_id': 1, 'task_id': 14, 'task_size': 254, 'num_data': 548, 'response_time': 149, 'availability': 0.9682570613969762, 'sla_violation': False} placed on Provider 1
Replica for data point {'provider_id': 1, 'region_name': 'US', 'dc_id': 1, 'vm_id': 1, 'task_id': 15, 'task_size': 581, 'num_data': 806, 'response_time': 215, 'availability': 0.9062671512354427, 'sla_violation': True} placed on Provider 1
Replica for data point {'provider_id': 1, 'region_name': 'US', 'dc_id': 1, 'vm_id': 1, 'task_id': 17, 'task_size': 596, 'num_data': 768, 'response_time': 172, 'availability': 0.9798496776091882, 'sla_violation': False} placed on Provider 1
Replica for data point {'provider_id': 1, '

ValueError: Crisp output cannot be calculated, likely because the system is too sparse. Check to make sure this set of input values will activate at least one connected Term in each Antecedent via the current set of Rules.