In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import json


In [2]:
data_path = Path.cwd().parent/"data"
mi_path = data_path/'sources/mobile_insight'
out_path = data_path/"mi_intermediate"

sample_period = "1s"

## Colect and preprocess data sources

We preload the portmap from the measurement plan to assign the device to the server

In [3]:
pdsch_cols = ['Serving Cell Index', 'Num RBs', 'TB Size', 'Num Transport Blocks Present','MCS']

In [4]:
pdsch_list = []
for p in mi_path.rglob("*LTE_PHY_PDSCH_Stat_Indication*.parquet"):
    print(p.relative_to(mi_path))

    pc = p.parent.stem
         
    df = pd.read_parquet(p, columns=pdsch_cols)
    
    
    df['Serving Cell Index'] = df['Serving Cell Index'].map({'PCell': 'PCell', '1_SCell': 'SCell', '2_SCell': 'SCell'})
    
    # Count RBs for each MCS level
    mcs = df[["MCS", "Serving Cell Index"]].assign(rb_weight=df["Num RBs"]/df["Num Transport Blocks Present"])
    mcs = mcs.groupby(["MCS", "Serving Cell Index"]).resample(sample_period)["rb_weight"].sum(numeric_only=True)
    mcs_pivot = mcs.unstack(level="MCS")
    mcs_pivot = mcs_pivot[mcs_pivot.sum(axis=1)>0]
    mcs_pivot = mcs_pivot.where(mcs_pivot>0, np.nan)
    
    # Weighted average MCS
    weighted_mcs = mcs_pivot.columns*mcs_pivot
    avg_mcs = weighted_mcs.sum(axis=1)/mcs_pivot.sum(axis=1)
    
    mcs_pivot.rename(columns=lambda x: f"RBs_MCS_{x}", inplace=True)
    mcs_pivot = mcs_pivot.assign(Average_MCS=avg_mcs.round())
    
    # Only sum RBs once for a single record, divide by the number of TBs
    df['Num RBs'] /= df["Num Transport Blocks Present"]
    df.drop(columns=['MCS', 'Num Transport Blocks Present'], inplace=True)

    df = df.groupby('Serving Cell Index').resample(sample_period).sum(numeric_only=True)
    
    df = df[df['Num RBs']>0]
    
    df = pd.merge(df, mcs_pivot, on=["timestamp", "Serving Cell Index"])
    
    df = df.reset_index().set_index('timestamp').sort_index()

    df['device'] = pc
    pdsch_list.append(df)

pc1\LTE_PHY_PDSCH_Stat_Indication.parquet
pc2\LTE_PHY_PDSCH_Stat_Indication.parquet
pc3\LTE_PHY_PDSCH_Stat_Indication.parquet
pc4\LTE_PHY_PDSCH_Stat_Indication.parquet


In [5]:
pdsch_df = pd.concat(pdsch_list)

In [6]:
pdsch_df.to_parquet(out_path/"pdsch.parquet", compression="gzip")
pdsch_df

Unnamed: 0_level_0,Serving Cell Index,Num RBs,TB Size,RBs_MCS_0,RBs_MCS_1,RBs_MCS_2,RBs_MCS_3,RBs_MCS_4,RBs_MCS_5,RBs_MCS_6,...,RBs_MCS_24,RBs_MCS_25,RBs_MCS_26,RBs_MCS_27,RBs_MCS_28,RBs_MCS_29,RBs_MCS_30,RBs_MCS_31,Average_MCS,device
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-21 10:28:00+02:00,PCell,5.0,16,,,,5.0,,,,...,,,,,,,,,3.0,pc1
2021-06-21 10:28:01+02:00,PCell,6.0,21,,,6.0,,,,,...,,,,,,,,,2.0,pc1
2021-06-21 10:33:09+02:00,PCell,11.0,52,3.0,8.0,,,,,,...,,,,,,,,,1.0,pc1
2021-06-21 10:33:10+02:00,PCell,11.0,52,3.0,8.0,,,,,,...,,,,,,,,,1.0,pc1
2021-06-21 10:33:11+02:00,PCell,12.0,63,,12.0,,,,,,...,,,,,,,,,1.0,pc1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-24 10:20:31+02:00,PCell,269.0,2671,196.0,4.0,,52.0,,,,...,,,,,17.0,,,,2.0,pc4
2021-06-24 10:20:32+02:00,PCell,49.0,3935,3.0,4.0,,,,,,...,,,,,42.0,,,,24.0,pc4
2021-06-24 10:20:33+02:00,PCell,43.0,3641,,4.0,,,,,,...,,,,,39.0,,,,25.0,pc4
2021-06-24 10:20:34+02:00,PCell,160.0,4146,108.0,4.0,,,,,,...,,,,,36.0,,,,7.0,pc4
