In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import json

In [2]:
data_path = Path.cwd().parent/"data"
mi_path = data_path/'sources/mobile_insight'
out_path = data_path/"mi_intermediate"

sample_period = "1s"

## Colect and preprocess data sources

We preload the portmap from the measurement plan to assign the device to the server

In [3]:
pusch_cols = ['Serving Cell ID', 'Num of RB', 'PUSCH TB Size', 'PUSCH Tx Power (dBm)']

pusch_agg = {'Serving Cell ID': 'median', # approx to mode
             'Num of RB': 'sum',
             'PUSCH TB Size': 'sum',
             'PUSCH Tx Power (dBm)': 'mean'}

In [4]:
pusch_list = []
for p in mi_path.rglob("*LTE_PHY_PUSCH_Tx_Report*.parquet"):
    print(p.relative_to(mi_path))

    pc = p.parent.stem
         
    df = pd.read_parquet(p, columns=pusch_cols)

    df = df.resample(sample_period).aggregate(pusch_agg).dropna()
    df['device'] = pc
    pusch_list.append(df)

pc1\LTE_PHY_PUSCH_Tx_Report.parquet
pc2\LTE_PHY_PUSCH_Tx_Report.parquet
pc3\LTE_PHY_PUSCH_Tx_Report.parquet
pc4\LTE_PHY_PUSCH_Tx_Report.parquet


In [5]:
pusch_df = pd.concat(pusch_list)
pusch_df = pusch_df.astype({"Serving Cell ID": int}).rename(columns={"Num of RB": "Num RBs"})

In [6]:
pusch_df.to_parquet(out_path/"pusch.parquet", compression="gzip")
pusch_df

Unnamed: 0_level_0,Serving Cell ID,Num RBs,PUSCH TB Size,PUSCH Tx Power (dBm),device
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-06-21 10:33:09+02:00,115,31,54,44.800000,pc1
2021-06-21 10:33:10+02:00,115,101,189,44.937500,pc1
2021-06-21 10:33:11+02:00,115,60,0,46.200000,pc1
2021-06-21 10:33:12+02:00,115,98,211,45.531250,pc1
2021-06-21 10:33:13+02:00,115,92,396,44.900000,pc1
...,...,...,...,...,...
2021-06-24 10:20:32+02:00,42,85524,5597563,7.304609,pc4
2021-06-24 10:20:33+02:00,42,86902,5692276,6.666667,pc4
2021-06-24 10:20:34+02:00,42,86082,5635683,5.143430,pc4
2021-06-24 10:20:35+02:00,42,86250,5606782,8.297297,pc4
