In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# =============================================================================
#  Import Section
# =============================================================================

# Python lib/std-pkgs imports
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load Data Imports
import load_data as ld
from rawDataHandler import RawDataHandler, NodeDfStore

# IPython Shell Flag
py_ipython_flag = True

In [None]:
# =============================================================================
#  Load data
# =============================================================================

# Specify the paths

#src_dir = "/home/daniel/Documents/00_DNA/DASH/data/vary_num_macroues/run3/"
#src_dir = "/home/daniel/Documents/00_DNA/DASH/data/scenario4_varyNumUes/"
src_dir = "/home/shared_data/dash/dataset7_35Mbps_max_brate_withCa/run1/"


dir_dash_log = src_dir + "dash_client_log.txt"
dir_DlMacStats = src_dir + "DlMacStats.txt"
dir_UlMacStats = src_dir + "UlMacStats.txt"
dir_DlRsrpSinrStats = src_dir + "DlRsrpSinrStats.txt"
dir_mobility = src_dir + "mobility_trace.txt"
#dir_UlSinrStats = src_dir + "UlSinrStats.txt"
dir_MpegLog = src_dir + "mpeg_player_log.txt"

# Load dataframes
df_DlMacStats = ld.load_std_log(dir_DlMacStats)
df_UlMacStats = ld.load_std_log(dir_UlMacStats)
# NOTE: DlRsrpSinrStats takes a lot of memory space, so only load if necessary
load_rsrp = True
if load_rsrp:
    df_DlRsrpSinrStats = ld.load_std_log(dir_DlRsrpSinrStats)
#df_UlSinrStats = ld.load_std_log(dir_UlSinrStats)

# Additionally, change time units (microseconds to seconds)
df_mobility = ld.load_std_log(dir_mobility, timeHead="tstamp_us")
df_mobility['Time'] = round(df_mobility['Time'] / 1e6, 3)

# NOTE: remove the trailing '\t' from the column headers
df_MpegLog = ld.load_std_log(dir_MpegLog, timeHead="tstamp_us")
df_MpegLog['Time'] = round(df_MpegLog['Time'] / 1e6, 3)
df_MpegLog['Node'] = df_MpegLog['Node'] + 1 

df_Dashlog = ld.load_dash_client_log(dir_dash_log, verbose=True)
df_Dashlog['Time'] = round(df_Dashlog['Time'] / 1e6, 3)

In [None]:
df_DlRsrpSinrStats['cellId'].unique()

In [None]:
df_mobility[(df_mobility['IMSI'] == 5) & (df_mobility['Time'] < 200) ]

In [None]:
# =============================================================================
#  Extract columns of interest from each dataframe
# =============================================================================

if load_rsrp:
    df_DlRsrpSinrStats = df_DlRsrpSinrStats[['Time', 'IMSI', 'cellId', 'rsrp']]
    df_DlRsrpSinrStats = df_DlRsrpSinrStats.rename(columns={"rsrp": "Dl-RSRP", 'IMSI': "Node"})

    df_UlSinrStats = df_UlSinrStats[['Time', 'IMSI', 'cellId', 'sinrLinear']]
df_UlSinrStats = df_UlSinrStats.rename(columns={"sinrLinear": "Ul-SINRlinear", 'IMSI': "Node"})

df_UlMacStats = df_UlMacStats[['Time', 'IMSI', 'mcs', 'size']]
df_UlMacStats = df_UlMacStats.rename(columns={"IMSI": "Node", "mcs": "Ul-mcs", "size": "Ul-size"})

# Only Tb1 chosen, since Tb2 is empty (zeros)
df_DlMacStats = df_DlMacStats[['Time', 'IMSI', 'mcsTb1', 'sizeTb1']]
df_DlMacStats = df_DlMacStats.rename(columns={"IMSI": "Node", "mcsTb1": "Dl-mcs", "sizeTb1": "Dl-size"})

df_mobility = df_mobility[['Time', 'IMSI', 'pos_x','pos_y', 'pos_z', 'vel_x', 'vel_y', 'vel_z']]
df_mobility = df_mobility.rename(columns={'IMSI': 'Node'})

df_Dashlog = df_Dashlog

df_MpegLog = df_MpegLog #[['Time', 'Node', 'playbackTime', 'frameQueueBytes', 'frameQueueSize']]

In [None]:
np.sort(df_DlRsrpSinrStats['newBitRate_bps'].unique())

In [None]:
tmp_grp = df_DlRsrpSinrStats.groupby(['Node'])
tmp_node = tmp_grp.get_group(1)

tmp_node['cellId'].value_counts().sort_index()

In [None]:
np.sort(df_MpegLog['Node'].unique())


In [None]:
nodes = np.sort(df_Dashlog['Node'].unique())
print(nodes)
print("There are ", len(nodes), " nodes.")

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>div.output_scroll { height: 100em; }</style>"))

In [None]:
quality_counts = df_Dashlog['newBitRate_bps'].value_counts()
df_bitrate_counts = pd.DataFrame({'newBitRate_bps': quality_counts.index, "counts": quality_counts.values}) 
df_bitrate_counts['rel(%)'] = 100 * df_bitrate_counts['counts'] / df_bitrate_counts['counts'].sum()
df_bitrate_counts.sort_values(by='newBitRate_bps')

In [None]:
quality_counts = df_MpegLog['bitRate'].value_counts()
df_bitrate_counts = pd.DataFrame({'bitRate': quality_counts.index, "counts": quality_counts.values}) 
df_bitrate_counts['rel(%)'] = 100 * df_bitrate_counts['counts'] / df_bitrate_counts['counts'].sum()
df_bitrate_counts.sort_values(by='bitRate')

In [None]:
# =============================================================================
#  Get grouped data
# =============================================================================
gp_Dashlog = df_Dashlog.groupby(['Node'])

gp_MpegLog = df_MpegLog.groupby(['Node'])

if load_rsrp:
    gp_Rsrp = df_DlRsrpSinrStats.groupby(['Node'])

gp_Sinr = df_UlSinrStats.groupby(['Node'])

gp_DlMacTbs = df_DlMacStats.groupby(['Node'])

gp_UlMac = df_UlMacStats.groupby(['Node'])

gp_mob = df_mobility.groupby(['Node'])


In [None]:
# =============================================================================
#  Plot relevant data
# =============================================================================

save_dir = "/home/daniel/Documents/00_DNA/DASH/dash-repo/code/data_preproc_log/nodes_plot_all_jupyter.pdf"

# nodes to plot
nd_toplot = [2, 5, 7, 10, 13, 15, 17, 20, 23, 25, 30, 33, 37]

n_nds = len(nd_toplot)
print(len(nd_toplot))

# Create axes for the plots
fig, axes = plt.subplots(nrows=13, ncols=n_nds, figsize=(400, 400))

# Time and ticks parameters
T_lower = 0.0
T_upper = 1000.0
fsize=(15, 5)
step = round((T_upper - T_lower)/20)
xtics = np.arange(T_lower, T_upper, step)

for i in range(n_nds):
    node = nd_toplot[i]
    
    # Get the dataframes for the node 
    
    df_node_raw = pd.DataFrame(gp_Dashlog.get_group(node))
    df_node_mpeg = pd.DataFrame(gp_MpegLog.get_group(node))

    if load_rsrp:
        df_node_rsrp = pd.DataFrame(gp_Rsrp.get_group(node))
        # Convert to dB miliwatts
        df_node_rsrp['Dl-RSRP'] = 10 * np.log10(df_node_rsrp['Dl-RSRP'] * 1000)

    df_node_sinr = pd.DataFrame(gp_Sinr.get_group(node))
    # Convert to dB
    df_node_sinr['Ul-SINRlinear'] = 10 * np.log10(df_node_sinr['Ul-SINRlinear'])

    df_node_dltbs = pd.DataFrame(gp_DlMacTbs.get_group(node))

    df_node_ulmac = pd.DataFrame(gp_UlMac.get_group(node))

    df_node_mob = pd.DataFrame(gp_mob.get_group(node))
    
    # Plot
    
    df_node_sinr[(T_lower <= df_node_sinr.Time) & (df_node_sinr.Time <= T_upper)].plot(x='Time', y='cellId', 
                                                   style='o',
                                                   #kind='bar',
                                                   title='Cell-Id association',
                                                    grid=True,
                                                 #    figsize=fsize,
                                                     xticks=xtics,
                                                    ax=axes[0,i]
                                                  )

    # Plot newBitRate from dash_client_log
    df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='newBitRate_bps', 
                                                   style='o',
                                                   #kind='bar',
                                                   title='DASHLOG New Bitrate',
                                                    grid=True,
                                                #    figsize=fsize,
                                                    xticks=xtics,
                                                    ax=axes[1,i]
                                                  )
    # Plot bitRate from mpeg_player_log
    df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='bitRate', 
                                                   style='o',
                                                   #kind='bar',
                                                   title='MPEG bitRate',
                                                    grid=True,
                                                   #  figsize=fsize,
                                                     xticks=xtics,
                                                    ax=axes[2,i]
                                                  )


    df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='thputOverLastSeg_bps', 
                                                 title='DASHLOG Throughput over last Segment',
                                                style='o',
                                                 grid=True,
                                                #  figsize=fsize,                                    
                                                 xticks=xtics,
                                                    ax=axes[3,i]
                                                 )

    df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='avgThputOverWindow_bps(estBitRate)', 
                                                 title='DASHLOG Average Throughput',
                                                grid=True,
                                                #kind='scatter',
                                                style='o',
                                                # figsize=fsize,
                                                 xticks=xtics,
                                                    ax=axes[4,i]
                                               )

    df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='frameQueueBytes', style='o', 
                                                title='DASHLOG FrameQueue Bytes',
                                                grid=True,
                                                 # figsize=fsize,
                                                 xticks=xtics,
                                                    ax=axes[5,i]
                                               )

    df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='frameQueueBytes', 
                                                   style='o',
                                                   #kind='bar',
                                                   title='MPEGLOG frameQueueBytes',
                                                    grid=True,
                                                   #  figsize=fsize,
                                                     xticks=xtics,
                                                    ax=axes[6,i]
                                                  )

    df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='frameQueueSize', style='o',
                                                title='DASHLOG FrameQueue Size',
                                                grid=True,
                                                 # figsize=fsize,
                                                 xticks=xtics,
                                                    ax=axes[7,i]
                                               )

    df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='frameQueueSize', 
                                                   style='o',
                                                   #kind='bar',
                                                   title='MPEGLOG frameQueueSize',
                                                    grid=True,
                                                #  figsize=fsize,
                                                  xticks=xtics,
                                                    ax=axes[8,i]
                                                  )
    if load_rsrp:
        df_node_rsrp[(T_lower <= df_node_rsrp.Time) & (df_node_rsrp.Time <= T_upper)].plot(x='Time', y='Dl-RSRP', style='o',
                                                title='RSRP',
                                                grid=True,
                                                #  figsize=fsize,
                                                 xticks=xtics,
                                                    ax=axes[9,i]
                                               )

    df_node_sinr[(T_lower <= df_node_sinr.Time) & (df_node_sinr.Time <= T_upper)].plot(x='Time', y='Ul-SINRlinear', style='o',
                                                   title='SINR',
                                                grid=True,
                                               #  figsize=fsize,
                                                xticks=xtics,
                                                    ax=axes[10,i]
                                               )

    df_node_dltbs[(T_lower <= df_node_dltbs.Time) & (df_node_dltbs.Time <= T_upper)].plot(x='Time', y='Dl-size', style='o',
                                                   title='DL MAC TBS',
                                                grid=True,
                                                #  figsize=fsize,
                                                 xticks=xtics,
                                                    ax=axes[11,i]
                                               )

    df_node_dltbs[(T_lower <= df_node_dltbs.Time) & (df_node_dltbs.Time <= T_upper)].plot(x='Time', y='Dl-mcs', style='o',
                                                   title='DL MAC mcs-Tb1',
                                                grid=True,
                                                # figsize=fsize,
                                                xticks=xtics,
                                                    ax=axes[12,i]
                                               )
fig.savefig(save_dir, format="pdf")



In [None]:
save_dir = "/home/daniel/Documents/00_DNA/DASH/dash-repo/code/data_preproc_log/nodes_plot_all_jupyter.pdf"
fig.savefig(save_dir, format="pdf")

In [None]:
print("rty")

In [None]:
# =============================================================================
#  Plot relevant data
# =============================================================================
node=47

gp_Dashlog = df_Dashlog.groupby(['Node'])
df_node_raw = pd.DataFrame(gp_Dashlog.get_group(node))

gp_MpegLog = df_MpegLog.groupby(['Node'])
df_node_mpeg = pd.DataFrame(gp_MpegLog.get_group(node))

if load_rsrp:
    gp_Rsrp = df_DlRsrpSinrStats.groupby(['Node'])
    df_node_rsrp = pd.DataFrame(gp_Rsrp.get_group(node))
    # Convert to dB miliwatts
    df_node_rsrp['Dl-RSRP'] = 10 * np.log10(df_node_rsrp['Dl-RSRP'] * 1000)

gp_Sinr = df_UlSinrStats.groupby(['Node'])
df_node_sinr = pd.DataFrame(gp_Sinr.get_group(node))
# Convert to dB
df_node_sinr['Ul-SINRlinear'] = 10 * np.log10(df_node_sinr['Ul-SINRlinear'])

gp_DlMacTbs = df_DlMacStats.groupby(['Node'])
df_node_dltbs = pd.DataFrame(gp_DlMacTbs.get_group(node))

gp_UlMac = df_UlMacStats.groupby(['Node'])
df_node_ulmac = pd.DataFrame(gp_UlMac.get_group(node))

gp_mob = df_mobility.groupby(['Node'])
df_node_mob = pd.DataFrame(gp_mob.get_group(node))

In [None]:
df_node_sinr

In [None]:
df_node_mpeg['frameQueueBytes'][(df_node_mpeg.Time >= 8) & (df_node_mpeg.Time < 12)].mean()


In [None]:
df_tmp = df_node_ulmac[(df_node_ulmac.Time >= -6) & (df_node_ulmac.Time < 4)]
data = df_tmp['Ul-size'][(df_tmp.Time >= -6) & (df_tmp.Time < -5-5)].sum()
data / 0.5

In [None]:
tmp_list = [100, 200, 300, 400] 
tmp_arr = np.asarray(tmp_list)
np.quantile(tmp_arr, 0.25)

In [None]:
node_quality_counts = df_node_raw['newBitRate_bps'].value_counts()
df_node_bitrate_counts = pd.DataFrame({'newBitRate_bps': node_quality_counts.index, "counts": node_quality_counts.values}) 
df_node_bitrate_counts['rel(%)'] = 100 * df_node_bitrate_counts['counts'] / df_node_bitrate_counts['counts'].sum()
df_node_bitrate_counts.sort_values(by='newBitRate_bps')

In [None]:
node_quality_counts = df_node_mpeg['bitRate'].value_counts()
df_node_bitrate_counts = pd.DataFrame({'bitRate': node_quality_counts.index, "counts": node_quality_counts.values}) 
df_node_bitrate_counts['rel(%)'] = 100 * df_node_bitrate_counts['counts'] / df_node_bitrate_counts['counts'].sum()
df_node_bitrate_counts.sort_values(by='bitRate')

In [None]:
tmp  = df_node_raw['newBitRate_bps'][(df_node_raw.Time >= 0.0) & (df_node_raw.Time < 7.0)]

In [None]:
df_node_ulmac

In [None]:
T_lower = 0.0
T_upper = 50.0
fsize=(15, 5)
step = round((T_upper - T_lower)/45)
xtics = np.arange(T_lower, T_upper, step)

df_node_sinr[(T_lower <= df_node_sinr.Time) & (df_node_sinr.Time <= T_upper)].plot(x='Time', y='cellId', 
                                               style='o',
                                               #kind='bar',
                                               title='Cell-Id association',
                                                grid=True,
                                               figsize=fsize,
                                                xticks=xtics
                                              )

# Plot newBitRate from dash_client_log
df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='newBitRate_bps', 
                                               style='o',
                                               #kind='bar',
                                               title='DASHLOG New Bitrate',
                                                grid=True,
                                               figsize=fsize,
                                                xticks=xtics
                                              )
# Plot bitRate from mpeg_player_log
df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='bitRate', 
                                               style='o',
                                               #kind='bar',
                                               title='MPEG bitRate',
                                                grid=True,
                                               figsize=fsize,
                                                xticks=xtics
                                              )


df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='thputOverLastSeg_bps', 
                                             title='DASHLOG Throughput over last Segment',
                                            style='o',
                                             grid=True,
                                             figsize=fsize,                                    
                                            xticks=xtics
                                             )

df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='avgThputOverWindow_bps(estBitRate)', 
                                             title='DASHLOG Average Throughput',
                                            grid=True,
                                            #kind='scatter',
                                            style='o',
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='frameQueueBytes', style='o', 
                                            title='DASHLOG FrameQueue Bytes',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='frameQueueBytes', 
                                               style='o',
                                               #kind='bar',
                                               title='MPEGLOG frameQueueBytes',
                                                grid=True,
                                               figsize=fsize,
                                                xticks=xtics
                                              )

df_node_raw[(T_lower <= df_node_raw.Time) & (df_node_raw.Time <= T_upper)].plot(x='Time', y='frameQueueSize', style='o',
                                            title='DASHLOG FrameQueue Size',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_mpeg[(T_lower <= df_node_mpeg.Time) & (df_node_mpeg.Time <= T_upper)].plot(x='Time', y='frameQueueSize', 
                                               style='o',
                                               #kind='bar',
                                               title='MPEGLOG frameQueueSize',
                                                grid=True,
                                               figsize=fsize,
                                                xticks=xtics
                                              )
if load_rsrp:
    df_node_rsrp[(T_lower <= df_node_rsrp.Time) & (df_node_rsrp.Time <= T_upper)].plot(x='Time', y='Dl-RSRP', style='o',
                                            title='RSRP',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_sinr[(T_lower <= df_node_sinr.Time) & (df_node_sinr.Time <= T_upper)].plot(x='Time', y='Ul-SINRlinear', style='o',
                                               title='SINR',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_dltbs[(T_lower <= df_node_dltbs.Time) & (df_node_dltbs.Time <= T_upper)].plot(x='Time', y='Dl-size', style='o',
                                               title='DL MAC TBS',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )

df_node_dltbs[(T_lower <= df_node_dltbs.Time) & (df_node_dltbs.Time <= T_upper)].plot(x='Time', y='Dl-mcs', style='o',
                                               title='DL MAC mcs-Tb1',
                                            grid=True,
                                            figsize=fsize,
                                            xticks=xtics
                                           )