In [1]:
import pandas as pd

# Cargar archivo procesado (ya con decimales correctos)
df = pd.read_csv("../data/bbr/bbr1.log.csv", sep=";")

# Eliminar columnas sin nombre
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

# Crear timestamp artificial (0.1s entre muestras)
df['timestamp'] = df.index * 0.1

# Asegurar tipos correctos
df = df.astype({
    'bytes_sent': 'int',
    'bytes_acked': 'int',
    'bytes_retrans': 'int',
    'mss': 'int',
    'rtt': 'float'
})

# Deltas por intervalo
delta_bytes_sent = df['bytes_sent'].diff()
delta_bytes_acked = df['bytes_acked'].diff()

df['throughput'] = delta_bytes_acked / df['timestamp'].diff()
df['packets_sent'] = delta_bytes_sent / df['mss']
df['packets_acked'] = delta_bytes_acked / df['mss']
df['packets_lost'] = df['packets_sent'] - df['packets_acked']
df['loss_rate'] = df['bytes_retrans'].diff() / delta_bytes_sent.replace(0, pd.NA)

# Eliminar primera fila NaN
df.dropna(inplace=True)

# Mostrar
df[['timestamp', 'throughput', 'packets_lost', 'loss_rate']].head()



Unnamed: 0,timestamp,throughput,packets_lost,loss_rate
1,0.1,275120.0,-1.0,0.0
2,0.2,260640.0,-10.0,0.0
3,0.3,231680.0,2.0,0.0
4,0.4,318560.0,4.0,0.0
5,0.5,231680.0,6.0,0.0


In [1]:
# Normalizar el dataset:

import pandas as pd
import os
from pathlib import Path

root = Path("../data/")
output_root = Path("../data_normalized/")

for variant_dir in root.iterdir():
    if variant_dir.is_dir():
        out_dir = output_root / variant_dir.name
        out_dir.mkdir(parents=True, exist_ok=True)

        for file in variant_dir.glob("*.csv"):
            df = pd.read_csv(file, sep=';')  

            # Normalización por archivo
            for col in ['cwnd', 'rtt', 'bytes_retrans', 'throughput', 'packets_lost']:
                if col in df.columns:
                    df[f'{col}_z'] = (df[col] - df[col].mean()) / df[col].std()

            df.to_csv(out_dir / file.name, index=False)
