In [201]:
import numpy as np
import matplotlib.pyplot as plt
import time
import pandas as pd
import math
import random
import pickle

# Pre-Processing

In [202]:
# Open the input file and output file in the same context
with open('llmrawdata.txt', 'r') as infile, open('lmprocesseddata.txt', 'w') as outfile:
    for line in infile:
        if 'l4s_ecn_marking-start' in line:
            # Remove the prefix 'l4s_ecn_marking-start,' and trailing ' end'
            parts = line.split("-")
            if len(parts) > 1:
                content = parts[1].strip()  # Remove any leading/trailing whitespace
                if content.endswith('end'):
                    # Write the content to the output file, stripping the 'end' part
                    outfile.write(content[6:-4] + '\n')


In [203]:
column_list = [
    "queue_type",                   # q->queue_type
    "qdelay_reference",             # pprms->qdelay_ref
    "tupdate",                      # pprms->tupdate
    "max_burst",                    # pprms->max_burst
    "max_ecn_threshold",            # pprms->max_ecnth
    "alpha_coefficient",            # pprms->alpha
    "beta_coefficient",             # pprms->beta
    "flags",                        # pprms->flags
    "burst_allowance",              # pst->burst_allowance
    "drop_probability",             # pst->drop_prob
    "current_queue_delay",          # pst->current_qdelay
    "previous_queue_delay",         # pst->qdelay_old
    "accumulated_probability",      # pst->accu_prob
    "measurement_start_time",       # pst->measurement_start
    "average_dequeue_time",         # pst->avg_dq_time
    "dequeue_count",                # pst->dq_count
    "status_flags",                 # pst->sflags
    "total_packets",                # q->stats.tot_pkts
    "total_bytes",                  # q->stats.tot_bytes
    "queue_length",                 # q->stats.length
    "length_in_bytes",              # q->stats.len_bytes
    "total_drops",                  # q->stats.drops
    "dequeue_action",               # dequeue_action
]


In [204]:
df=pd.read_csv("lmprocesseddata.txt",names=column_list,header=None)

# Drop columns that contain 'pprms' in their name
columns_to_drop = [
    "qdelay_reference",             # pprms->qdelay_ref
    "tupdate",                      # pprms->tupdate
    "max_burst",                    # pprms->max_burst
    "max_ecn_threshold",            # pprms->max_ecnth
    "alpha_coefficient",            # pprms->alpha
    "beta_coefficient",             # pprms->beta
    "flags",                        # pprms->flags
    ]
df = df.drop(columns=columns_to_drop)

In [205]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 536439 entries, 0 to 536438
Data columns (total 16 columns):
 #   Column                   Non-Null Count   Dtype
---  ------                   --------------   -----
 0   queue_type               536439 non-null  int64
 1   burst_allowance          536439 non-null  int64
 2   drop_probability         536439 non-null  int64
 3   current_queue_delay      536439 non-null  int64
 4   previous_queue_delay     536439 non-null  int64
 5   accumulated_probability  536439 non-null  int64
 6   measurement_start_time   536439 non-null  int64
 7   average_dequeue_time     536439 non-null  int64
 8   dequeue_count            536439 non-null  int64
 9   status_flags             536439 non-null  int64
 10  total_packets            536439 non-null  int64
 11  total_bytes              536439 non-null  int64
 12  queue_length             536439 non-null  int64
 13  length_in_bytes          536439 non-null  int64
 14  total_drops              536439 non-

In [206]:
df['dequeue_action']=df['dequeue_action']-1
df['dequeue_action'].unique()
df['dequeue_action'].value_counts()

dequeue_action
0    532357
1      3394
2       688
Name: count, dtype: int64

In [207]:
df.to_csv("final_exp_csv.csv")

# For trimming

In [208]:
# # Calculate the number of rows to trim (8% of total rows)
# rows_to_trim = int(len(df) * 0.02)

# # Trim the top 5% of the DataFrame
# trimmed_df = df.iloc[rows_to_trim:]

# # Reset the index of the trimmed DataFrame
# trimmed_df.reset_index(drop=True, inplace=True)

# # Display the trimmed DataFrame
# print(trimmed_df)
# df = trimmed_df

# Gen_Exp_Pool

In [209]:
df.shape[0]

536439

In [None]:
import numpy as np
import pickle

class ExperiencePool:
    """
    Experience pool for collecting trajectories.
    """
    def __init__(self):
        self.states = []
        self.actions = []
        self.rewards = []
        self.dones = []

    def add(self, state, action, reward, done):
        self.states.append(state)  # sometimes state is also called obs (observation)
        self.actions.append(action)
        self.rewards.append(reward)
        self.dones.append(done)

    def __len__(self):
        return len(self.states)


# Define the list of columns to include
columns_to_use = [
    'queue_type', 
    'burst_allowance',
    'drop_probability',
    'current_queue_delay',
    'accumulated_probability',
    'average_dequeue_time',
    'length_in_bytes',
    'total_drops'
]

# Variable to store the window size (number of rows to collect)
window_size = 5

# Initialize the experience pool
exp_pool = ExperiencePool()

# Initialize variables for tracking previous action and accumulating rows
prev_action = None
row_buffer = []
count=0

# Iterate through each row and update the experience pool
for index, row in df.iterrows():
    state = np.array(row[columns_to_use], dtype=np.float32)
    current_action = row['dequeue_action']
    
    if prev_action is not None and current_action != prev_action:
        count+=1
        # If there's a change in action, collect the last 'window_size' rows
        start_index = max(0, index - window_size)  # Ensure we don't go below 0
        end_index = index  # Exclude the current row (where the action changed)

        # Select the last 'window_size' rows and add them to the experience pool
        selected_rows = df.iloc[start_index:end_index]
        for _, selected_row in selected_rows.iterrows():
            state = np.array(selected_row[columns_to_use], dtype=np.float32)
            exp_pool.add(state=state, action=selected_row['dequeue_action'], reward=selected_row['current_queue_delay'], done=0)
        
        # Clear buffer as we only want the last 50 rows at each action change
        row_buffer = []

    # Add current row to the buffer (though it won't be used in the final experience pool)
    row_buffer.append(row)
    if index > df.shape[0] *0.06:
        break;
    
    # Update the previous action
    prev_action = current_action

# Save the experience pool
pickle_save_path = 'exp_pool_l4s_train.pkl'
with open(pickle_save_path, 'wb') as f:
    pickle.dump(exp_pool, f)

print(f"Done. Experience pool saved at: {pickle_save_path}")
import os
# Delete the output file after processing
os.remove('lmprocesseddata.txt')

Done. Experience pool saved at: exp_pool_l4s_eval.pkl


In [211]:
print(count)

50


In [212]:
len(exp_pool.actions)

272108

In [213]:
10465/20

523.25

In [214]:
import numpy as np
from torch.utils.data import Dataset
from munch import Munch
from torch.utils.data import DataLoader


def discount_returns(rewards, gamma, scale):
    returns = [0 for _ in range(len(rewards))]
    returns[-1] = rewards[-1]
    for i in reversed(range(len(rewards) - 1)):
        returns[i] = rewards[i] + gamma * returns[i + 1]
    for i in range(len(returns)):
        returns[i] /= scale  # scale down return
    return returns


class ExperienceDataset(Dataset):
    """
    A dataset class that wraps the experience pool.
    """
    def __init__(self, exp_pool, gamma=1., scale=10, max_length=30, sample_step=None) -> None:
        """
        :param exp_pool: the experience pool
        :param gamma: the reward discounted factor
        :param scale: the factor to scale the return
        :param max_length: the w value in our paper, see the paper for details.
        """
        if sample_step is None:
            sample_step = max_length

        self.exp_pool = exp_pool
        self.exp_pool_size = len(exp_pool)
        self.gamma = gamma
        self.scale = scale
        self.max_length = max_length

        self.returns = []
        self.timesteps = []
        self.rewards = []

        self.exp_dataset_info = {}

        self._normalize_rewards()
        self._compute_returns()
        self.exp_dataset_info.update({
            'max_action': max(self.actions),
            'min_action': min(self.actions)
        })

        self.dataset_indices = list(range(0, self.exp_pool_size - max_length + 1, min(sample_step, max_length)))
    
    def sample_batch(self, batch_size=1, batch_indices=None):
        """
        Sample a batch of data from the experience pool.
        :param batch_size: the size of a batch. For CJS task, batch_size should be set to 1 due to the unstructural data format.
        """
        if batch_indices is None:
            batch_indices = np.random.choice(len(self.dataset_indices), size=batch_size)
        batch_states, batch_actions, batch_returns, batch_timesteps = [], [], [], []
        for i in range(batch_size):
            states, actions, returns, timesteps = self[batch_indices[i]]
            batch_states.append(states)
            batch_actions.append(actions)
            batch_returns.append(returns)
            batch_timesteps.append(timesteps)
        return batch_states, batch_actions, batch_returns, batch_timesteps
    
    @property
    def states(self):
        return self.exp_pool.states

    @property
    def actions(self):
        return self.exp_pool.actions
    
    @property
    def dones(self):
        return self.exp_pool.dones
    
    def __len__(self):
        return len(self.dataset_indices)
    
    def __getitem__(self, index):
        start = self.dataset_indices[index]
        end = start + self.max_length
        return self.states[start:end], self.actions[start:end], self.returns[start:end], self.timesteps[start:end]

    def _normalize_rewards(self):
        min_reward, max_reward = min(self.exp_pool.rewards), max(self.exp_pool.rewards)
        rewards = (np.array(self.exp_pool.rewards) - min_reward) / (max_reward - min_reward)
        self.rewards = rewards.tolist()
        self.exp_dataset_info.update({
            'max_reward': max_reward,
            'min_reward': min_reward,
        })

    def _compute_returns(self):
        """
        Compute returns (discounted cumulative rewards)
        """
        episode_start = 0
        while episode_start < self.exp_pool_size:
            try:
                episode_end = self.dones.index(True, episode_start) + 1
            except ValueError:
                episode_end = self.exp_pool_size
            self.returns.extend(discount_returns(self.rewards[episode_start:episode_end], self.gamma, self.scale))
            self.timesteps += list(range(episode_end - episode_start))
            episode_start = episode_end
        assert len(self.returns) == len(self.timesteps)
        self.exp_dataset_info.update({
            # for normalizing rewards/returns
            'max_return': max(self.returns),
            'min_return': min(self.returns),

            # to help determine the maximum size of timesteps embedding
            'min_timestep': min(self.timesteps),
            'max_timestep': max(self.timesteps),
        })


In [215]:
exp_dataset = ExperienceDataset(exp_pool, gamma=1.0, scale=1000, max_length=20, sample_step=10)
batch_size =1
exp_dataset_info = Munch(exp_dataset.exp_dataset_info)
dataloader = DataLoader(exp_dataset, batch_size, shuffle=True, pin_memory=True)

for step, batch in enumerate(dataloader):
    states, actions, returns, timesteps = batch
    print("Type of first element in states:", type(states[0]))
    print("process_batch states type:",type(states))
    break

Type of first element in states: <class 'torch.Tensor'>
process_batch states type: <class 'list'>


In [216]:
count=0
for step, batch in enumerate(dataloader):
    count+=1
print(count)

27209
