# Merge PHY Serving Cell measurements

## Preliminaries: Imports, load data and defines

In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
data_path = Path.cwd().parent/"data"
mi_path = data_path/'sources/mobile_insight'
out_path = data_path/"mi_intermediate"

res_ival = "1s"

In [3]:
drop_cols = ['log_msg_len', 'type_id', 'Version', 'Number of SubPackets', 'SubPacket ID',
       'SubPacket Version', 'SubPacket Size', 'Current SFN',
       'Current Subframe Number','file']
mean_cols = ['RSRP Rx[0]', 'RSRP Rx[1]', 'RSRP', 'RSRQ Rx[0]',
       'RSRQ Rx[1]', 'RSRQ', 'RSSI Rx[0]',
       'RSSI Rx[1]', 'RSSI', 'FTL SNR Rx[0]',
       'FTL SNR Rx[1]']
median_cols = ['Physical Cell ID', 'E-ARFCN']

res_cols = {c: 'mean' for c in mean_cols}
for c in median_cols:
    res_cols[c] = 'median' # approx to mode
load_cols = ['Serving Cell Index'] + mean_cols + median_cols

In [4]:
in_path = mi_path
phy_list = []

for p in in_path.rglob("*/*LTE_PHY_Serv_Cell_Measurement*.parquet"):
    print(p.relative_to(in_path))

    pc, fname = p.relative_to(in_path).parts
         
    df = pd.read_parquet(p, columns=load_cols)
    if df.index.dtype != 'datetime64[ns, Europe/Berlin]':
        print("Adding index as tz-ed datetime")
        df.set_index("timestamp", inplace=True)
        df.index = df.index.tz_localize("Europe/Berlin")
    df['device'] = pc
    phy_list.append(df)
phy_df = pd.concat(phy_list)

pc1\LTE_PHY_Serv_Cell_Measurement.parquet
pc2\LTE_PHY_Serv_Cell_Measurement.parquet
pc3\LTE_PHY_Serv_Cell_Measurement.parquet
pc4\LTE_PHY_Serv_Cell_Measurement.parquet


# Step 2 - Preprocess PHY

1. Drop useless cols
2. Merge secondary cells 
3. Resample to 1 second

In [5]:
phy_df

Unnamed: 0_level_0,Serving Cell Index,RSRP Rx[0],RSRP Rx[1],RSRP,RSRQ Rx[0],RSRQ Rx[1],RSRQ,RSSI Rx[0],RSSI Rx[1],RSSI,FTL SNR Rx[0],FTL SNR Rx[1],Physical Cell ID,E-ARFCN,device
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-06-21 10:28:00.037601+02:00,PCell,-131.5625,-103.8125,-103.8125,-20.5625,-14.3125,-14.3125,-90.9375,-69.5000,-69.5000,-9.500000,1.2,115,1300,pc1
2021-06-21 10:28:00.676466+02:00,PCell,-132.3125,-104.1250,-104.1250,-21.6875,-15.5625,-15.5625,-90.5625,-68.5625,-68.5625,-7.700000,0.1,115,1300,pc1
2021-06-21 10:28:01.316467+02:00,PCell,-132.4375,-104.2500,-104.2500,-21.8750,-15.0000,-15.0000,-90.6250,-69.2500,-69.2500,-8.700000,1.0,115,1300,pc1
2021-06-21 10:28:01.956616+02:00,PCell,-132.3750,-104.1250,-104.1250,-21.4375,-14.1875,-14.1875,-91.0000,-69.9375,-69.9375,-7.600000,2.0,115,1300,pc1
2021-06-21 10:33:09.039063+02:00,PCell,-132.8750,-104.6250,-104.6250,-22.1875,-15.2500,-15.2500,-90.6250,-69.3750,-69.3750,-11.700000,0.9,115,1300,pc1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-24 10:20:35.995802+02:00,PCell,-74.5000,-84.3750,-74.5000,-4.0625,-3.7500,-3.7500,-50.4375,-60.6875,-60.6875,21.200001,15.7,42,1300,pc4
2021-06-24 10:20:36.005793+02:00,PCell,-74.4375,-84.5000,-74.4375,-3.2500,-2.8125,-2.8125,-51.1250,-61.7500,-61.7500,21.200001,15.1,42,1300,pc4
2021-06-24 10:20:36.015796+02:00,PCell,-74.3125,-84.6250,-74.3125,-3.3125,-3.1250,-3.1250,-51.0000,-61.5000,-61.5000,21.299999,15.3,42,1300,pc4
2021-06-24 10:20:36.025686+02:00,PCell,-74.2500,-84.6875,-74.2500,-4.1875,-4.3125,-4.1875,-50.1250,-60.3750,-60.3750,21.500000,15.4,42,1300,pc4


In [6]:
phy_df['Serving Cell Index'] = phy_df['Serving Cell Index'].map({'PCell': 'PCell', '1_SCell': 'SCell', '2_SCell': 'SCell'})

In [7]:
phy_res = phy_df.groupby(['device','Serving Cell Index']).resample(res_ival).aggregate(res_cols)

In [8]:
phy_res = phy_res.sort_index().dropna(how="all")

In [9]:
phy_res = phy_res.reset_index().set_index("timestamp").sort_index()
phy_res["Physical Cell ID"] = phy_res["Physical Cell ID"].astype(int)
phy_res

Unnamed: 0_level_0,device,Serving Cell Index,RSRP Rx[0],RSRP Rx[1],RSRP,RSRQ Rx[0],RSRQ Rx[1],RSRQ,RSSI Rx[0],RSSI Rx[1],RSSI,FTL SNR Rx[0],FTL SNR Rx[1],Physical Cell ID,E-ARFCN
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2021-06-21 10:28:00+02:00,pc1,PCell,-131.937500,-103.968750,-103.968750,-21.125000,-14.937500,-14.937500,-90.750000,-69.031250,-69.031250,-8.600000,0.650000,115,1300.0
2021-06-21 10:28:01+02:00,pc1,PCell,-132.406250,-104.187500,-104.187500,-21.656250,-14.593750,-14.593750,-90.812500,-69.593750,-69.593750,-8.150000,1.500000,115,1300.0
2021-06-21 10:33:09+02:00,pc1,PCell,-132.635174,-104.776163,-104.776163,-21.893895,-15.398256,-15.398256,-90.741279,-69.376453,-69.376453,-9.211628,0.367442,115,1300.0
2021-06-21 10:33:10+02:00,pc1,PCell,-132.663889,-105.054167,-105.054167,-21.929167,-15.711111,-15.711111,-90.747222,-69.341667,-69.341667,-8.797778,-0.384444,115,1300.0
2021-06-21 10:33:11+02:00,pc1,PCell,-133.319079,-105.263158,-105.263158,-22.389803,-15.907895,-15.907895,-90.934211,-69.345395,-69.345395,-8.402632,0.315789,115,1300.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-24 19:09:12+02:00,pc1,PCell,-85.600625,-85.401250,-85.383125,-10.673125,-8.995000,-8.981250,-54.921250,-56.410625,-56.410625,15.800000,21.929000,246,1300.0
2021-06-24 19:09:13+02:00,pc1,PCell,-85.636875,-85.468750,-85.444375,-10.504375,-8.968750,-8.955625,-55.135000,-56.499375,-56.499375,15.542000,21.881000,246,1300.0
2021-06-24 19:09:14+02:00,pc1,PCell,-85.599375,-85.346875,-85.330625,-10.204375,-8.773750,-8.773125,-55.393750,-56.572500,-56.572500,15.581000,22.471000,246,1300.0
2021-06-24 19:09:15+02:00,pc1,PCell,-85.886250,-85.691875,-85.668750,-10.043750,-9.008125,-8.997500,-55.833750,-56.685000,-56.685000,15.744000,22.822000,246,1300.0


In [10]:
phy_res.to_parquet(out_path/"phy_serv_cell.parquet")