# Transforming the New York 4G dataset into the format wanted by the simulation (json list)

Dataset available at: https://github.com/NYU-METS/Main/tree/master

In [48]:
import pandas as pd
import os
import json
import matplotlib.pyplot as plt

In [49]:
log_folder = "../new_data/NY_4G_data"
save_folder = "../new_data/NY_4G_data_json"

In [50]:
logs_list = []
for folder, subfolders, files in os.walk(log_folder):
    for file in files:
        if file.endswith("csv"):
            logs_list.append(os.path.join(folder,file))

In [51]:
print(logs_list)

['../new_data/NY_4G_data/Dataset_2/7Train/7trainNew.csv', '../new_data/NY_4G_data/Dataset_2/7Train/7BtrainNew.csv', '../new_data/NY_4G_data/Dataset_2/QTrain/QtrainNew.csv', '../new_data/NY_4G_data/Dataset_2/BusBrooklyn/bus62New.csv', '../new_data/NY_4G_data/Dataset_2/BusBrooklyn/bus57New.csv', '../new_data/NY_4G_data/Dataset_1/Bus_B62/bus62_2.csv', '../new_data/NY_4G_data/Dataset_1/Bus_B62/bus62.csv', '../new_data/NY_4G_data/Dataset_1/Car/Car_1.csv', '../new_data/NY_4G_data/Dataset_1/Car/Car_2.csv', '../new_data/NY_4G_data/Dataset_1/Ferry/Ferry4.csv', '../new_data/NY_4G_data/Dataset_1/Ferry/Ferry2.csv', '../new_data/NY_4G_data/Dataset_1/Ferry/Ferry3.csv', '../new_data/NY_4G_data/Dataset_1/Ferry/Ferry5.csv', '../new_data/NY_4G_data/Dataset_1/Ferry/Ferry1.csv', '../new_data/NY_4G_data/Dataset_1/Bus_NYU_Campus/NYU_Campus_Bus.csv', '../new_data/NY_4G_data/Dataset_1/Subway_D_Train/d2.csv', '../new_data/NY_4G_data/Dataset_1/Subway_D_Train/d1.csv', '../new_data/NY_4G_data/Dataset_1/Subway_7Tr

In [57]:
for log_file in logs_list:

    print("Reading from: ", log_file)

    df = pd.read_csv(log_file, sep=",", header=None)
    if len(df.columns) > 2:
        print(df)
        df.columns = ["capacity", "str_Mbps"] + ["bla"]*(len(df.columns)-2)
    else:
        df.columns = ["capacity", "str_Mbps"]
    
    if isinstance(df.iloc[0]["capacity"], str):
        print("IM in the right place!")
        df = df.iloc[1:].reset_index(drop=True)
        df = df.astype({'capacity': 'float'})

    
    df["duration"] = 1000
    df["capacity"] = df["capacity"]*1000
    df = df[["duration", "capacity"]]
    df["time"] = df["duration"].cumsum() / 1000

    d_final = {
        "type": "video",
        "downlink": {},
        "uplink": {
            "trace_pattern": df[["time", "duration", "capacity"]].to_dict(orient="records")
        },
    }
    
    save_file = log_file.split("/")[-2] + "_" + log_file.split("/")[-1].split(".")[0] + ".json"
    print("Saving to: ", os.path.join(save_folder,save_file))
    
    with open(os.path.join(save_folder,save_file), "w") as log_save:
        json.dump(d_final, log_save)

Reading from:  ../new_data/NY_4G_data/Dataset_2/7Train/7trainNew.csv
Saving to:  ../new_data/NY_4G_data_json/7Train_7trainNew.json
Reading from:  ../new_data/NY_4G_data/Dataset_2/7Train/7BtrainNew.csv
Saving to:  ../new_data/NY_4G_data_json/7Train_7BtrainNew.json
Reading from:  ../new_data/NY_4G_data/Dataset_2/QTrain/QtrainNew.csv
Saving to:  ../new_data/NY_4G_data_json/QTrain_QtrainNew.json
Reading from:  ../new_data/NY_4G_data/Dataset_2/BusBrooklyn/bus62New.csv
Saving to:  ../new_data/NY_4G_data_json/BusBrooklyn_bus62New.json
Reading from:  ../new_data/NY_4G_data/Dataset_2/BusBrooklyn/bus57New.csv
Saving to:  ../new_data/NY_4G_data_json/BusBrooklyn_bus57New.json
Reading from:  ../new_data/NY_4G_data/Dataset_1/Bus_B62/bus62_2.csv
Saving to:  ../new_data/NY_4G_data_json/Bus_B62_bus62_2.json
Reading from:  ../new_data/NY_4G_data/Dataset_1/Bus_B62/bus62.csv
Saving to:  ../new_data/NY_4G_data_json/Bus_B62_bus62.json
Reading from:  ../new_data/NY_4G_data/Dataset_1/Car/Car_1.csv
Saving to: 

In [54]:
def make_bandwidth_series(trace_path, step_time):
    
    #read bandwidth file and create series
    #index timestamps, values bandwidth
    with open(trace_path, "r") as f:
        d = json.load(f)
    df = pd.DataFrame(d["uplink"]["trace_pattern"])
    time = [0] + list(df["duration"].cumsum())
    capacities = [df["capacity"].iloc[0]] + list(df["capacity"])
    s = pd.Series(index=pd.to_datetime(time, unit="ms"), data=capacities)
    capacities = s.resample(f"{step_time}ms").bfill()
    return capacities

In [55]:
parent_folder = "../new_data/NY_4G_data_json"

traces = [os.path.join(parent_folder, log_file) for log_file \
          in os.listdir(parent_folder) if "json" in log_file]

traces

['../new_data/NY_4G_data_json/Subway_Q_Train_Q_Train2.json',
 '../new_data/NY_4G_data_json/7Train_7trainNew.json',
 '../new_data/NY_4G_data_json/Bus_M15_M15_1.json',
 '../new_data/NY_4G_data_json/Subway_D_Train_d2.json',
 '../new_data/NY_4G_data_json/LIRR_Long_Island_Rail_Road.json',
 '../new_data/NY_4G_data_json/BusBrooklyn_bus62New.json',
 '../new_data/NY_4G_data_json/Bus_M15_M15_2.json',
 '../new_data/NY_4G_data_json/QTrain_QtrainNew.json',
 '../new_data/NY_4G_data_json/Bus_B57_bus57_1.json',
 '../new_data/NY_4G_data_json/Car_Car_2.json',
 '../new_data/NY_4G_data_json/Subway_7Train_7Train1.json',
 '../new_data/NY_4G_data_json/BusBrooklyn_bus57New.json',
 '../new_data/NY_4G_data_json/Subway_D_Train_d1.json',
 '../new_data/NY_4G_data_json/Subway_7Train_7Train2.json',
 '../new_data/NY_4G_data_json/Ferry_Ferry4.json',
 '../new_data/NY_4G_data_json/Ferry_Ferry2.json',
 '../new_data/NY_4G_data_json/Car_Car_1.json',
 '../new_data/NY_4G_data_json/Bus_NYU_Campus_NYU_Campus_Bus.json',
 '../ne

In [56]:
for trace_path in traces:
    capacities = make_bandwidth_series(trace_path, 200)
    print(trace_path)
    print(capacities)
    print("--------------------------------------")

../new_data/NY_4G_data_json/Subway_Q_Train_Q_Train2.json
1970-01-01 00:00:00.000     4050.0
1970-01-01 00:00:00.200     4050.0
1970-01-01 00:00:00.400     4050.0
1970-01-01 00:00:00.600     4050.0
1970-01-01 00:00:00.800     4050.0
                            ...   
1970-01-01 00:23:14.200    12900.0
1970-01-01 00:23:14.400    12900.0
1970-01-01 00:23:14.600    12900.0
1970-01-01 00:23:14.800    12900.0
1970-01-01 00:23:15.000    12900.0
Freq: 200L, Length: 6976, dtype: float64
--------------------------------------
../new_data/NY_4G_data_json/7Train_7trainNew.json
1970-01-01 00:00:00.000    5080.0
1970-01-01 00:00:00.200    5080.0
1970-01-01 00:00:00.400    5080.0
1970-01-01 00:00:00.600    5080.0
1970-01-01 00:00:00.800    5080.0
                            ...  
1970-01-01 03:13:10.200       0.0
1970-01-01 03:13:10.400       0.0
1970-01-01 03:13:10.600       0.0
1970-01-01 03:13:10.800       0.0
1970-01-01 03:13:11.000       0.0
Freq: 200L, Length: 57956, dtype: float64
------------

../new_data/NY_4G_data_json/7Train_7BtrainNew.json
1970-01-01 00:00:00.000    1010.0
1970-01-01 00:00:00.200    1010.0
1970-01-01 00:00:00.400    1010.0
1970-01-01 00:00:00.600    1010.0
1970-01-01 00:00:00.800    1010.0
                            ...  
1970-01-01 03:30:58.200    3140.0
1970-01-01 03:30:58.400    3140.0
1970-01-01 03:30:58.600    3140.0
1970-01-01 03:30:58.800    3140.0
1970-01-01 03:30:59.000    3140.0
Freq: 200L, Length: 63296, dtype: float64
--------------------------------------
../new_data/NY_4G_data_json/Bus_B62_bus62_2.json
1970-01-01 00:00:00.000    4720.0
1970-01-01 00:00:00.200    4720.0
1970-01-01 00:00:00.400    4720.0
1970-01-01 00:00:00.600    4720.0
1970-01-01 00:00:00.800    4720.0
                            ...  
1970-01-01 00:24:11.200    2940.0
1970-01-01 00:24:11.400    2940.0
1970-01-01 00:24:11.600    2940.0
1970-01-01 00:24:11.800    2940.0
1970-01-01 00:24:12.000    2940.0
Freq: 200L, Length: 7261, dtype: float64
------------------------------