In [1]:
import os, sys
import subprocess
from tqdm import tqdm
import json
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

from datetime import date

In [2]:
from lxml import etree
import xml.etree.ElementTree as ET
import traci

In [3]:
## Definitions of some useful functions
def parse_if_number(s):
    try: return float(s)
    except: return True if s=="true" else False if s=="false" else s if s else None
def parse_ndarray(s):
    return np.fromstring(s, sep=' ') if s else None
## plotting the running average
def running_avg(x):
    return np.cumsum(x) / np.arange(1, x.size + 1)
## plotting running time average
def running_timeavg(t,x):
    dt = t[1:] - t[:-1]
    return np.cumsum(x[:-1] * dt) / t[1:]

In [4]:
def interpolate_dataframe(x, df):
    for n in range(df.shape[0]):
        xp = df.iloc[n].vectime
        fp = df.iloc[n].vecvalue
        f = np.interp(x, xp, fp)
        
        df.iloc[n].vectime = x
        df.iloc[n].vecvalue = f
        
    return df

In [5]:
vec = pd.read_csv('VoIP-UL.high_traffic.vec.csv', converters = {
                  'attrvalue': parse_if_number,
                  'binedges': parse_ndarray,
                  'binvalues': parse_ndarray,
                  'vectime': parse_ndarray,
                  'vecvalue': parse_ndarray})

In [6]:
vec = vec.dropna(subset=['vecvalue'])

In [7]:
gnodeb1 = vec.loc[vec.module.str.contains('THOSA_Highway.gNodeB1')]
gnodeb2 = vec.loc[vec.module.str.contains('THOSA_Highway.gNodeB2')]

In [8]:
channel = vec.loc[vec.module.str.contains('ChannelModel')]
mobility = vec.loc[vec.module.str.contains('mobility')]
car = vec.loc[vec.module.str.contains('car')]
packet = car.loc[car.name.str.contains('Packet')]
phy = car.loc[car.module.str.contains('Phy')]
app = car.loc[car.module.str.contains('app')]
mac = car.loc[car.module.str.contains('Mac')]

In [9]:
# Filter out the vector size smaller than 2

In [10]:
for n in range(car.shape[0]):
    if car.iloc[n].vectime.size <= 1:
        car.iloc[n].vecvalue = np.nan

for n in range(channel.shape[0]):
    if channel.iloc[n].vectime.size <= 1:
        channel.iloc[n].vecvalue = np.nan
        
for n in range(mobility.shape[0]):
    if mobility.iloc[n].vectime.size <= 1:
        mobility.iloc[n].vecvalue = np.nan
        
for n in range(packet.shape[0]):
    if packet.iloc[n].vectime.size <= 1:
        packet.iloc[n].vecvalue = np.nan

for n in range(phy.shape[0]):
    if phy.iloc[n].vectime.size <= 1:
        phy.iloc[n].vecvalue = np.nan
        
for n in range(app.shape[0]):
    if app.iloc[n].vectime.size <= 1:
        app.iloc[n].vecvalue = np.nan

for n in range(mac.shape[0]):
    if mac.iloc[n].vectime.size <= 1:
        mac.iloc[n].vecvalue = np.nan

In [11]:
car = car.dropna(subset=['vecvalue'])
channel = channel.dropna(subset=['vecvalue'])
mobility = mobility.dropna(subset=['vecvalue'])
packet = packet.dropna(subset=['vecvalue'])
phy = phy.dropna(subset=['vecvalue'])
app = app.dropna(subset=['vecvalue'])
mac = mac.dropna(subset=['vecvalue'])

In [12]:
numCar = mobility.module.unique().size

# create an empty dataframe
df = pd.DataFrame(columns = [
        'timeStep', 'vehId', 
        'lastPosX', 'lastPosY', 'lastVelocity', 'lastOrientation', 'distance',
        'measuredSinrUl', 'measuredSinrDl', 'rcvdSinrUl', 'rcvdSinrDl',
        'averageCqiUl', 'averageCqiDl', 'servingCell',
        'voIPGeneratedThroughput',
        'macDelayUl'
    ])

for vehId in range(numCar):
    mobility_car = mobility.loc[mobility.module.str.contains('car\[' + str(vehId) + '\]')]
    channel_car = channel.loc[channel.module.str.contains('car\[' + str(vehId) + '\]')]
    phy_car = phy.loc[phy.module.str.contains('car\[' + str(vehId) + '\]')]
    app_car = app.loc[app.module.str.contains('car\[' + str(vehId) + '\]')]
    mac_car = mac.loc[mac.module.str.contains('car\[' + str(vehId) + '\]')]
    
    timeStep = mobility_car.iloc[0].vectime
    
    lastPosX = mobility_car.loc[mobility_car.name.str.contains('LastPosX')]
    lastPosY = mobility_car.loc[mobility_car.name.str.contains('LastPosY')]
    lastVelocity = mobility_car.loc[mobility_car.name.str.contains('LastVelocity')]
    lastOrientation = mobility_car.loc[mobility_car.name.str.contains('LastOrientation')]
    
    measuredSinrUl = channel_car.loc[channel_car.name.str.contains('measuredSinrUl')]
    measuredSinrDl = channel_car.loc[channel_car.name.str.contains('measuredSinrDl')]
    rcvdSinrUl = channel_car.loc[channel_car.name.str.contains('rcvdSinrUl')]
    rcvdSinrDl = channel_car.loc[channel_car.name.str.contains('rcvdSinrDl')]
    distance = channel_car.loc[channel_car.name.str.contains('distance')]
    
    averageCqiUl = phy_car.loc[phy_car.name.str.contains('averageCqiUl')]
    averageCqiDl = phy_car.loc[phy_car.name.str.contains('averageCqiDl')]
    servingCell = phy_car.loc[phy_car.name.str.contains('servingCell')]
    
    voIPGeneratedThroughput = app_car.loc[app_car.name.str.contains('voIPGeneratedThroughput')]
    
    macDelayUl = mac_car.loc[mac_car.name.str.contains('macDelayUl')]
    
#     if measuredSinrUl.empty or measuredSinrDl.empty or rcvdSinrUl.empty or rcvdSinrDl.empty or averageCqiUl.empty or averageCqiDl.empty:
#         continue
    
    
    # mobility data is recorded with longer interval, therefore they need to be
    # interpolated with network data
    measuredSinrUl = interpolate_dataframe(timeStep, measuredSinrUl)
    measuredSinrDl = interpolate_dataframe(timeStep, measuredSinrDl)
    rcvdSinrUl = interpolate_dataframe(timeStep, rcvdSinrUl)
    rcvdSinrDl = interpolate_dataframe(timeStep, rcvdSinrDl)
    averageCqiUl = interpolate_dataframe(timeStep, averageCqiUl)
    averageCqiDl = interpolate_dataframe(timeStep, averageCqiDl)
    servingCell = interpolate_dataframe(timeStep, servingCell)
    distance = interpolate_dataframe(timeStep, distance)
    voIPGeneratedThroughput = interpolate_dataframe(timeStep, voIPGeneratedThroughput)
    macDelayUl = interpolate_dataframe(timeStep, macDelayUl)
    
    
    
    new_df = pd.DataFrame(columns = [
        'timeStep', 'vehId', 
        'lastPosX', 'lastPosY', 'lastVelocity', 'lastOrientation', 'distance',
        'measuredSinrUl', 'measuredSinrDl', 'rcvdSinrUl', 'rcvdSinrDl',
        'averageCqiUl', 'averageCqiDl', 'servingCell',
        'voIPGeneratedThroughput',
        'macDelayUl'
    ])
    
    new_df['timeStep'] = timeStep
    new_df['vehId'] = vehId
    new_df['lastPosX'] = lastPosX.vecvalue.values[0]
    new_df['lastPosY'] = lastPosY.vecvalue.values[0]
    new_df['lastVelocity'] = lastVelocity.vecvalue.values[0]
    new_df['lastOrientation'] = lastOrientation.vecvalue.values[0]
    
    if not measuredSinrUl.empty:
        new_df['measuredSinrUl'] = measuredSinrUl.vecvalue.values[0]
    if not measuredSinrDl.empty:
        new_df['measuredSinrDl'] = measuredSinrDl.vecvalue.values[0]
    if not rcvdSinrUl.empty:        
        new_df['rcvdSinrUl'] = rcvdSinrUl.vecvalue.values[0]
    if not rcvdSinrDl.empty:
        new_df['rcvdSinrDl'] = rcvdSinrDl.vecvalue.values[0]
    if not distance.empty:
        new_df['distance'] = distance.vecvalue.values[0]
    if not averageCqiUl.empty:    
        new_df['averageCqiUl'] = averageCqiUl.vecvalue.values[0]
    if not averageCqiDl.empty:    
        new_df['averageCqiDl'] = averageCqiDl.vecvalue.values[0]
    if not servingCell.empty:    
        new_df['servingCell'] = servingCell.vecvalue.values[0]
    if not voIPGeneratedThroughput.empty:
        new_df['voIPGeneratedThroughput'] = voIPGeneratedThroughput.vecvalue.values[0]
    if not macDelayUl.empty:
        new_df['macDelayUl'] = macDelayUl.vecvalue.values[0]
    
    df = pd.concat([df, new_df])

In [13]:
df.to_csv('dataset_VoIP-UL_car_high_traffic_' + date.today().strftime("%Y%m%d") +'.csv')

In [64]:
gnodeb1.module.unique()

array(['THOSA_Highway.gNodeB1.udp',
       'THOSA_Highway.gNodeB1.cellularNic.mac',
       'THOSA_Highway.gNodeB1.cellularNic.rlc.um',
       'THOSA_Highway.gNodeB1.cellularNic.pdcpRrc',
       'THOSA_Highway.gNodeB1.pppIf.queue',
       'THOSA_Highway.gNodeB1.pppIf.ppp', 'THOSA_Highway.gNodeB1.sctp',
       'THOSA_Highway.gNodeB1.x2ppp[0].queue',
       'THOSA_Highway.gNodeB1.x2ppp[0].ppp',
       'THOSA_Highway.gNodeB1.x2App[0].client'], dtype=object)

In [65]:
gnodeb1.name.unique()

array(['passedUpPk:vector(count)', 'droppedPkWrongPort:vector(count)',
       'droppedPkBadChecksum:vector(count)', 'avgServedBlocksUl:vector',
       'avgServedBlocksDl:vector',
       'sentPacketToLowerLayer:vector(packetBytes)',
       'sentPacketToUpperLayer:vector(packetBytes)',
       'receivedPacketFromLowerLayer:vector(packetBytes)',
       'packetSent:vector(packetBytes)', 'incomingPacketLengths:vector',
       'incomingDataRate:vector', 'queueingTime:vector',
       'outgoingPacketLengths:vector', 'outgoingDataRate:vector',
       'transmissionState:vector', 'txPk:vector(packetBytes)',
       'queueLength:vector', 'queueBitLength:vector',
       'rxPkOk:vector(packetBytes)', 'passedUpPk:vector(packetBytes)',
       'packetReceived:vector(packetBytes)',
       'receivedPacketFromUpperLayer:vector(packetBytes)',
       'rlcPacketLossDl:vector', 'macDelayDl:vector',
       'SendQueue of Association 36', 'TSN Sent 36:192.168.4.2',
       'Outstanding Bytes 36:192.168.4.2', 'Outst

In [159]:
gnodeb1.loc[gnodeb1.name.str.contains('macDelayDl')]

Unnamed: 0,run,type,module,name,attrname,attrvalue,vectime,vecvalue
2713,VoIP-UL-0-20220920-10:27:10-232583,vector,THOSA_Highway.gNodeB1.cellularNic.mac,macDelayDl:vector,,,"[59.62, 59.639, 59.659, 59.679, 59.699, 59.719...","[0.004, 0.004, 0.004, 0.004, 0.004, 0.004, 0.0..."


In [171]:
gnodeb1.loc[gnodeb1.name.str.contains('incomingDataRate')]

Unnamed: 0,run,type,module,name,attrname,attrvalue,vectime,vecvalue
4895,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.pppIf.queue,incomingDataRate:vector,,,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4944,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.pppMEHostIf.queue,incomingDataRate:vector,,,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
5078,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.x2ppp[0].queue,incomingDataRate:vector,,,"[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, ...","[33280.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0..."


In [173]:
gnodeb1.loc[gnodeb1.name.str.contains('Throughput')]

Unnamed: 0,run,type,module,name,attrname,attrvalue,vectime,vecvalue
4950,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.sctp,Throughput of Association 33,,,[0.0],[0.0]
4951,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.sctp,Throughput of Association 14,,,[0.0],[0.0]
4953,VoIP-UL-0-20220920-16:28:24-248526,vector,THOSA_Highway.gNodeB1.sctp,Throughput of Association 33,,,[1.292e-07],[0.0]


In [85]:
################ for gNodeB #######################

In [None]:
gNodeB = vec.loc[vec.module.str.contains('THOSA_Highway.gNodeB')]

In [None]:
mac = gNodeB.loc[gNodeB.module.str.contains('mac')]
pppIf = gNodeB.loc[gNodeB.module.str.contains('pppIf')]

In [88]:
for n in range(mac.shape[0]):
    if mac.iloc[n].vectime.size <= 1:
        mac.iloc[n].vecvalue = np.nan

for n in range(pppIf.shape[0]):
    if pppIf.iloc[n].vectime.size <= 1:
        pppIf.iloc[n].vecvalue = np.nan

In [89]:
mac = mac.dropna(subset=['vecvalue'])
pppIf = pppIf.dropna(subset=['vecvalue'])

In [90]:
numGNodeB = 2

In [91]:
gNodeB_pos = [[500, 500], [4500, -500]]

In [92]:
# create an empty dataframe
df = pd.DataFrame(columns = [
        'timeStep', 'gNodeBId', 
        'lastPosX', 'lastPosY',
        'avgServedBlocksUl', 'avgServedBlocksDl', 'macDelayDl',
        'incomingDataRate', 'outgoingDataRate'
    ])

for gNodeB_id in range(numGNodeB):
    mac_gNodeB = mac.loc[mac.module.str.contains('gNodeB' + str(gNodeB_id+1))]
    pppIf_gNodeB = pppIf.loc[pppIf.module.str.contains('gNodeB' + str(gNodeB_id+1))]
    
    gNodeBId = gNodeB_id + 1
    
    lastPosX = gNodeB_pos[0][0]
    lastPosY = gNodeB_pos[0][1]
    
    avgServedBlocksUl = mac_gNodeB.loc[mac_gNodeB.name.str.contains('avgServedBlocksUl')]
    avgServedBlocksDl = mac_gNodeB.loc[mac_gNodeB.name.str.contains('avgServedBlocksDl')]

    macDelayDl = mac_gNodeB.loc[mac_gNodeB.name.str.contains('macDelayDl')]
        
    incomingDataRate = pppIf_gNodeB.loc[pppIf_gNodeB.name.str.contains('incomingDataRate')]
    outgoingDataRate = pppIf_gNodeB.loc[pppIf_gNodeB.name.str.contains('incomingDataRate')]
    
    # mobility data is recorded with longer interval, therefore they need to be
    # interpolated with network data
    
    timeStep = avgServedBlocksDl.vectime.values[0]
    incomingDataRate = interpolate_dataframe(timeStep, incomingDataRate)
    outgoingDataRate = interpolate_dataframe(timeStep, outgoingDataRate)
    
    macDelayDl = interpolate_dataframe(timeStep, outgoingDataRate)
    
    new_df = pd.DataFrame(columns = [
        'timeStep', 'gNodeBId', 
        'lastPosX', 'lastPosY',
        'avgServedBlocksUl', 'avgServedBlocksDl', 'macDelayDl',
        'incomingDataRate', 'outgoingDataRate'
    ])
    
    new_df['timeStep'] = timeStep
    new_df['gNodeBId'] = gNodeBId
    new_df['lastPosX'] = lastPosX
    new_df['lastPosY'] = lastPosY
    
    if not avgServedBlocksUl.empty:
        new_df['avgServedBlocksUl'] = avgServedBlocksUl.vecvalue.values[0]
        new_df['avgServedBlocksDl'] = avgServedBlocksDl.vecvalue.values[0]
        
    if not incomingDataRate.empty:
        new_df['incomingDataRate'] = incomingDataRate.vecvalue.values[0]
        new_df['outgoingDataRate'] = outgoingDataRate.vecvalue.values[0]
        
    if not macDelayDl.empty:
        new_df['macDelayDl'] = macDelayDl.vecvalue.values[0]
            
            
    new_df.macDelayDl[new_df['macDelayDl'] == 0] = np.nan
    
    df = pd.concat([df, new_df])
    




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
df.to_csv('dataset_VoIP-UL_gNodeB_high_traffic_' + date.today().strftime("%Y%m%d") +'.csv')