# Install Libraries

In [1]:
!pip install pandas
!pip install openpyxl



In [6]:
!pip install gym

Collecting gym
  Downloading gym-0.26.2.tar.gz (721 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l\

In [None]:
import gym

# Data preprocessing

In [2]:
import pandas as pd
import os

# Directory containing the files
directory = 'archive/'

# List of files
files = ['task40.xlsx', 'task80.xlsx', 'task120.xlsx', 'task160.xlsx', 'task200.xlsx', 'task240.xlsx', 'task280.xlsx']

# Full file paths
file_paths = [os.path.join(directory, file) for file in files]

# Empty list to store dataframes for tasks and nodes
task_dataframes = []
node_dataframes = []

# Loop over the files
for file_path in file_paths:
    # Read Excel file
    task_df = pd.read_excel(file_path, sheet_name='TaskDetails')
    node_df = pd.read_excel(file_path, sheet_name='NodeDetails')
    
    # Add a file identifier to each dataframe
    #task_df['file_id'] = os.path.basename(file_path)
    #node_df['file_id'] = os.path.basename(file_path)
    
    # Add dataframe to the list
    task_dataframes.append(task_df)  
    node_dataframes.append(node_df)

# Concatenate all dataframes
task_df = pd.concat(task_dataframes, ignore_index=True)
node_df = pd.concat(node_dataframes, ignore_index=True)


# Check the result
print(task_df.head())
print(node_df.head())


  Unnamed: 0  Number of instructions (109 instructions)  Memory required (MB)  \
0     Task_1                                         40                   114   
1     Task_2                                         21                    57   
2     Task_3                                         45                    81   
3     Task_4                                         71                    90   
4     Task_5                                         92                    58   

   Input file size (MB)  Output file size (MB)  
0                    71                     28  
1                    44                     25  
2                    64                     10  
3                    78                     99  
4                    60                     20  
  Unnamed: 0  CPU rate (MIPS)  CPU usage cost  Memory usage cost  \
0     Node_1             3754          0.8044            0.02088   
1     Node_2             3585          0.9433            0.03190   
2     Node_3   

# Feature Scaling

In [3]:
print(task_df.dtypes)
print(node_df.dtypes)


Unnamed: 0                                   object
Number of instructions (109 instructions)     int64
Memory required (MB)                          int64
Input file size (MB)                          int64
Output file size (MB)                         int64
dtype: object
Unnamed: 0               object
CPU rate (MIPS)           int64
CPU usage cost          float64
Memory usage cost       float64
Bandwidth usage cost    float64
dtype: object


In [4]:
from sklearn.preprocessing import StandardScaler

# Assuming that the first column is 'Unnamed: 0'
task_df = task_df.drop(columns=['Unnamed: 0'])
node_df = node_df.drop(columns=['Unnamed: 0'])

# Display the result
print(task_df.head())
print(node_df.head())




   Number of instructions (109 instructions)  Memory required (MB)  \
0                                         40                   114   
1                                         21                    57   
2                                         45                    81   
3                                         71                    90   
4                                         92                    58   

   Input file size (MB)  Output file size (MB)  
0                    71                     28  
1                    44                     25  
2                    64                     10  
3                    78                     99  
4                    60                     20  
   CPU rate (MIPS)  CPU usage cost  Memory usage cost  Bandwidth usage cost
0             3754          0.8044            0.02088                0.0571
1             3585          0.9433            0.03190                0.0806
2             4617          0.8208            0.04455    

In [5]:
print("Columns in task_df: ", task_df.columns)
print("Columns in node_df: ", node_df.columns)


Columns in task_df:  Index(['Number of instructions (109 instructions)', 'Memory required (MB)',
       'Input file size (MB)', 'Output file size (MB)', 'file_id'],
      dtype='object')
Columns in node_df:  Index(['CPU rate (MIPS)', 'CPU usage cost', 'Memory usage cost',
       'Bandwidth usage cost', 'file_id'],
      dtype='object')


# Offloading environment

In [5]:
class OffloadingEnvironment(gym.Env):
    def __init__(self, task_df, node_df):
        super(OffloadingEnvironment, self).__init__()
        
        self.task_df = task_df
        self.node_df = node_df
        self.current_step = 0

        # Define action and observation space
        num_nodes = len(node_df)
        self.action_space = spaces.Discrete(num_nodes)
        self.observation_space = spaces.Box(low=0, high=1, shape=(num_nodes+4,))

    def step(self, action):
        # action is the node to offload to
        node = self.node_df.iloc[action]
        task = self.task_df.iloc[self.current_step]
        
        # calculate QoS metrics
        qos_metrics = self.calculate_qos_metrics(node, task)

        # reward is negative QoS metrics (we want to minimize them)
        reward = -qos_metrics

        # increment step
        self.current_step += 1

        # if we have processed all tasks, we're done
        done = self.current_step == len(self.task_df)

        # state includes current task and status of nodes
        state = self.calculate_state(task, self.node_df)

        return state, reward, done, {}

    def reset(self):
        self.current_step = 0
        initial_state = self.calculate_state(self.task_df.iloc[0], self.node_df)
        return initial_state

    def render(self, mode='human'):
        # optional, not needed for training
        pass

    def calculate_state(self, task, node_df):
        # replace this with your own function
        pass

    def calculate_qos_metrics(self, node, task):
        execution_time = task['Number of instructions (109 instructions)'] / node['CPU rate (MIPS)']
        memory_cost = task['Memory required (MB)'] * node['Memory usage cost']
        bandwidth_cost = task['Output file size (MB)'] * node['Bandwidth usage cost']

        # normalize the metrics
        normalized_execution_time = execution_time / max(self.task_df['Number of instructions (109 instructions)'] / self.node_df['CPU rate (MIPS)'])
        normalized_memory_cost = memory_cost / (max(self.task_df['Memory required (MB)']) * max(self.node_df['Memory usage cost']))
        normalized_bandwidth_cost = bandwidth_cost / (max(self.task_df['Output file size (MB)']) * max(self.node_df['Bandwidth usage cost']))
    
        
        # you might want to adjust these weights
        weights = [0.5, 0.25, 0.25]
        qos_metrics = weights[0] * normalized_execution_time + weights[1] * normalized_memory_cost + weights[2] * normalized_bandwidth_cost

        return qos_metrics


NameError: name 'gym' is not defined

In [None]:
task_data = task_data.drop(columns=['Unnamed: 0'])
node_data = node_data.drop(columns=['Unnamed: 0'])

In [None]:
task_data_np = task_data.values
node_data_np = node_data.values

In [None]:
node_data

Unnamed: 0,CPU rate (MIPS),CPU usage cost,Memory usage cost,Bandwidth usage cost
0,3754,0.8044,0.02088,0.0571
1,3585,0.9433,0.0319,0.0806
2,4617,0.8208,0.04455,0.0563
3,1097,0.2464,0.02723,0.0105
4,873,0.136,0.01474,0.018
5,1054,0.1142,0.01232,0.0183
6,1420,0.3196,0.01802,0.0113
7,1106,0.3844,0.01195,0.0181
8,1479,0.1576,0.0244,0.0129
9,645,0.2805,0.01828,0.0115


In [None]:
task_data

Unnamed: 0,Number of instructions (109 instructions),Memory required (MB),Input file size (MB),Output file size (MB)
0,40,114,71,28
1,21,57,44,25
2,45,81,64,10
3,71,90,78,99
4,92,58,60,20
5,36,107,90,31
6,21,198,38,56
7,72,52,99,43
8,33,121,81,70
9,14,145,42,82


# Environment

In [None]:
class TaskOffloadingEnvironment(gym.Env):
    def __init__(self, task_data_np, node_data_np, data_transfer_rate, cost_per_unit_data):
        super(TaskOffloadingEnvironment, self).__init__()

        self.task_data_np = task_data_np
        self.node_data_np = node_data_np
        self.data_transfer_rate = data_transfer_rate
        self.cost_per_unit_data = cost_per_unit_data

        # Define action and observation space
        # They must be gym.spaces objects
        self.action_space = gym.spaces.Discrete(len(node_data_np))
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(len(task_data_np[0])+len(node_data_np.flatten()),), dtype=np.float32)

        self.task_to_node_mapping = {}

    def step(self, action):
        # Execute one time step within the environment
        self.task_to_node_mapping[len(self.task_to_node_mapping)] = action

        total_latency, total_energy, total_cost = self.calculate_total_metrics(self.task_to_node_mapping)

        # We assume the reward is negatively proportional to the total metrics
        reward = -1 * (total_latency + total_energy + total_cost)

        done = len(self.task_to_node_mapping) == len(self.task_data_np)

        return self.get_observation(), reward, done, {}

    def reset(self):
        # Reset the state of the environment to an initial state
        self.task_to_node_mapping = {}
        return self.get_observation()

    def render(self, mode='human'):
        # Render the environment to the screen
        pass

    def get_observation(self):
        # If all tasks have been processed, return a zero observation
        if len(self.task_to_node_mapping) == len(self.task_data_np):
            return np.zeros(self.observation_space.shape)

        # Get the current task features
        current_task_id = len(self.task_to_node_mapping)
        current_task_features = self.task_data_np[current_task_id]

        # Get all node features
        node_features = self.node_data_np.flatten()

        # Concatenate current task features and node features to form the observation
        observation = np.concatenate([current_task_features, node_features])

        return observation

    def calculate_total_metrics(self, task_to_node_mapping):
        total_latency = 0
        total_energy = 0
        total_cost = 0

        for task_id, node_id in task_to_node_mapping.items():
            # calculate processing latency
            processing_latency = self.task_data_np[task_id, 0] / self.node_data_np[node_id, 0]

            # calculate data transfer latency
            data_transfer_latency = self.task_data_np[task_id, 2] / self.data_transfer_rate + self.task_data_np[task_id, 3] / self.data_transfer_rate

            # total latency
            latency = processing_latency + data_transfer_latency
            total_latency += latency

            # calculate energy
            energy = self.node_data_np[node_id, 1] * latency
            total_energy += energy

            # calculate cost
            processing_cost = (self.node_data_np[node_id, 1] + self.node_data_np[node_id, 2] + self.node_data_np[node_id, 3]) * latency
            data_transfer_cost = (self.task_data_np[task_id, 2] + self.task_data_np[task_id, 3]) * self.cost_per_unit_data
            cost = processing_cost + data_transfer_cost
            total_cost += cost

        return total_latency, total_energy, total_cost
