In [2]:
import pandas as pd
import numpy as np
import os
import datetime
import json

In [3]:
def parse_if_number(s):
    try:
        return float(s)
    except:
        return True if s == "true" else False if s == "false" else s if s else None

In [4]:
def parse_ndarray(s):
    return np.fromstring(s, sep=' ') if s else None

In [5]:
def parse_vectime_vecvalue(df):

    new_df = pd.DataFrame(columns=["Node", "Name", "Time", "Value"])

    names = []
    nodes = []

    count = 1

    print("Parsing vector file")

    vectimes = np.array([])
    vecvalues = np.array([])

    df_len = len(df.index)

    for index, row in df.iterrows():
        names.append([row[2] for _ in range(len(row.vectime))])
        nodes.append([row.node for _ in range(len(row.vectime))])
        vectimes = np.concatenate((vectimes, row.vectime))
        vecvalues = np.concatenate((vecvalues, row.vecvalue))

        print("Processed row: {} of {}".format(count, df_len))
        count += 1
        break

    new_df = new_df.append({"Node": nodes, "Name": names, "Time": vectimes, "Value": vecvalues}, ignore_index=True)

    return new_df

In [6]:
raw_results = "/home/brian/results-analysis/data/raw_data/mode4_highway_fast.csv"
stats = "/home/brian/results-analysis/configs/mode4-stats.json"

In [8]:
raw_df = pd.read_csv(raw_results, converters={
        "attrvalue": parse_if_number,
        "binedges" : parse_ndarray,
        "binvalues": parse_ndarray,
        "vectime"  : parse_ndarray,
        "vecvalue" : parse_ndarray})

print("Loaded csv into DataFrame")

Loaded csv into DataFrame


In [9]:
reset_df = raw_df

In [10]:
# Run when we want to reset the DF As the current being loaded in is large
raw_df = reset_df

In [11]:
# It's likely this will change depending on the run/system
# Might be worth investigating some form of alternative
broken_module = raw_df['module'].str.split('.', 3, expand=True)

raw_df["network"] = broken_module[0]
raw_df["node"] = broken_module[1]
raw_df["interface"] = broken_module[2]
raw_df["layer"] = broken_module[3]

raw_df = raw_df.drop("module", axis=1)

In [12]:
# Remove junk from common node names
raw_df.node = raw_df.node.str.replace("node", "")
raw_df.node = raw_df.node.str.replace("[", "")
raw_df.node = raw_df.node.str.replace("]", "")
raw_df.node = raw_df.node.str.replace("car", "")

In [13]:
if stats:
    with open(stats) as json_file:
        data = json.load(json_file)
        raw_df = raw_df[(raw_df["name"].isin(data["filtered_vectors"])) | (raw_df["name"].isin(data["filtered_scalars"]))]
        raw_df.reset_index()        

In [14]:
# This will always remain the same for all runs.
broken_run = raw_df['run'].str.split('-', 4, expand=True)

raw_df["scenario"] = broken_run[0]
raw_df["run"] = broken_run[1]
raw_df["date"] = broken_run[2]
raw_df["time"] = broken_run[3]
raw_df["processId"] = broken_run[4]

In [15]:
runattr_df = raw_df[raw_df["type"] == "runattr"]
runattr_df = runattr_df.dropna(axis=1, how="all")
runattr_df = runattr_df.reset_index(drop=True)

In [16]:
itervar_df = raw_df[raw_df["type"] == "itervar"]
itervar_df = itervar_df.dropna(axis=1, how="all")
itervar_df = itervar_df.reset_index(drop=True)

In [17]:
param_df = raw_df[raw_df["type"] == "param"]
param_df = param_df.dropna(axis=1, how="all")
param_df = param_df.reset_index(drop=True)

In [18]:
attr_df = raw_df[raw_df["type"] == "attr"]
attr_df = attr_df.dropna(axis=1, how="all")
attr_df = attr_df.reset_index(drop=True)

In [19]:
scalar_df = raw_df[raw_df["type"] == "scalar"]
scalar_df = scalar_df.dropna(axis=1, how="all")
scalar_df = scalar_df.reset_index(drop=True)

In [20]:
vector_df = raw_df[raw_df["type"] == "vector"]
vector_df = vector_df.dropna(axis=1, how="all")
vector_df = vector_df.reset_index(drop=True)

### Now to fix the vector df

Beyond this is where fixes are made and we redesign our function for fixing them

In [21]:
vector_df.head()

Unnamed: 0,run,type,name,vectime,vecvalue,network,node,interface,layer,scenario,date,time,processId
0,A,vector,transmission:vector(camStationId),"[200.10027580061, 200.20027580061, 200.3002758...","[1369133069.0, 1369133069.0, 1369133069.0, 136...",Mode4World,34,middleware,CaService,Mode4,0,20190327,10:41:09-11727
1,A,vector,transmission:vector(camGenerationDeltaTime),"[200.10027580061, 200.20027580061, 200.3002758...","[2624.0, 2624.0, 2624.0, 2624.0, 2624.0, 2624....",Mode4World,34,middleware,CaService,Mode4,0,20190327,10:41:09-11727
2,A,vector,receivedPacketFromUpperLayer:vector(packetBytes),"[200.10027580061, 200.20027580061, 200.3002758...","[88.0, 85.0, 85.0, 85.0, 85.0, 88.0, 85.0, 85....",Mode4World,34,lteNic,pdcpRrc,Mode4,0,20190327,10:41:09-11727
3,A,vector,sentPacketToLowerLayer:vector(packetBytes),"[200.10027580061, 200.20027580061, 200.3002758...","[89.0, 86.0, 86.0, 86.0, 86.0, 89.0, 86.0, 86....",Mode4World,34,lteNic,pdcpRrc,Mode4,0,20190327,10:41:09-11727
4,A,vector,selectedNumSubchannels:vector,"[200.10027580061, 205.20027580061, 206.215]","[5.0, 1.0, 5.0]",Mode4World,34,lteNic,mac,Mode4,0,20190327,10:41:09-11727


In [22]:
rows_we_want = vector_df.drop(["run", "type", "network", "node", "interface", "layer", "scenario", "date", "time", "processId"], axis=1)
rows_we_want = rows_we_want.reset_index(drop=True)

In [23]:
rows_we_want_vectime = rows_we_want.pivot(columns="name", values="vectime")
rows_we_want_vecvalue = rows_we_want.pivot(columns="name", values="vecvalue")

In [24]:
rows_we_want_vectime = rows_we_want_vectime.add_suffix(":time")
rows_we_want_vecvalue = rows_we_want_vecvalue.add_suffix(":value")

In [25]:
rows_we_want_vectime

name,grantBreak:vector:time,grantBreakSize:vector:time,grantRequests:vector:time,maximumCapacity:vector:time,receivedPacketFromLowerLayer:vector(packetBytes):time,receivedPacketFromUpperLayer:vector(packetBytes):time,reception:vector(camGenerationDeltaTime):time,reception:vector(camStationId):time,scisDecoded:vector:time,scisNotDecoded:vector:time,...,sentPacketToLowerLayer:vector(packetBytes):time,sentPacketToUpperLayer:vector(packetBytes):time,tbFailedButSCIReceived:vector:time,tbsDecoded:vector:time,tbsFailedDueToNoSCI:vector:time,tbsReceived:vector:time,tbsSent:vector:time,transmission:vector(camGenerationDeltaTime):time,transmission:vector(camStationId):time,txRxDistance:vector:time
0,,,,,,,,,,,...,,,,,,,,,"[200.10027580061, 200.20027580061, 200.3002758...",
1,,,,,,,,,,,...,,,,,,,,"[200.10027580061, 200.20027580061, 200.3002758...",,
2,,,,,,"[200.10027580061, 200.20027580061, 200.3002758...",,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,"[200.10027580061, 200.20027580061, 200.3002758...",,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,"[200.10027580061, 201.104, 201.104, 201.204, 2...",,,,,,,,,
6,,,"[200.10027580061, 205.20027580061, 206.215]",,,,,,,,...,,,,,,,,,,
7,,,,,,"[200.10027580061, 200.20027580061, 200.3002758...",,,,,...,,,,,,,,,,
8,,,,,"[200.10027580061, 201.106, 201.106, 201.106, 2...",,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,"[200.10072810147, 200.20072810147, 200.3007281...",


In [26]:
combined = pd.concat([rows_we_want_vecvalue, rows_we_want_vectime], axis=1, sort=False)

In [27]:
combined.size

804240

In [None]:
overall_df = pd.DataFrame()
for column in combined.columns:
    new_df = pd.DataFrame()
    col_vals = np.array([])
    for array in combined[column]:
        if type(array) is np.ndarray:
            col_vals = np.concatenate((col_vals, array), axis=None)
    new_df[column] = col_vals
    overall_df = pd.concat([new_df, overall_df], axis=1, sort=False)
    print(column)

grantBreak:vector:value
grantBreakSize:vector:value
grantRequests:vector:value
maximumCapacity:vector:value
receivedPacketFromLowerLayer:vector(packetBytes):value
receivedPacketFromUpperLayer:vector(packetBytes):value
reception:vector(camGenerationDeltaTime):value
reception:vector(camStationId):value
scisDecoded:vector:value
scisNotDecoded:vector:value
scisReceived:vector:value
scisSent:vector:value
selectedMCS:vector:value
selectedNumSubchannels:vector:value
sentPacketToLowerLayer:vector(packetBytes):value
sentPacketToUpperLayer:vector(packetBytes):value
tbFailedButSCIReceived:vector:value
tbsDecoded:vector:value
tbsFailedDueToNoSCI:vector:value
tbsReceived:vector:value
tbsSent:vector:value
transmission:vector(camGenerationDeltaTime):value
transmission:vector(camStationId):value
txRxDistance:vector:value
grantBreak:vector:time
grantBreakSize:vector:time
grantRequests:vector:time
maximumCapacity:vector:time
receivedPacketFromLowerLayer:vector(packetBytes):time
receivedPacketFromUpperLa

In [29]:
overall_df

Unnamed: 0,grantBreak:vector:value
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0
5,1.0
6,1.0
7,1.0
8,1.0
9,1.0


### Finished fixing vector DF

In [None]:
vector_df = parse_vectime_vecvalue(vector_df)

In [None]:
now = datetime.datetime.now()
if args.name != now:
    directory = "{}/{}-{}".format(args.tidied_results, now.strftime("%Y-%m-%d_%H:%M"), args.name)
else:
    directory = "{}/{}".format(args.tidied_results, now.strftime("%Y-%m-%d_%H:%M"))

os.mkdir(directory)

runattr_df.to_csv("{}/{}".format(directory, "runattr.csv"), index=False)
itervar_df.to_csv("{}/{}".format(directory, "itervar.csv"), index=False)
param_df.to_csv("{}/{}".format(directory, "params.csv"), index=False)
attr_df.to_csv("{}/{}".format(directory, "attr.csv"), index=False)
vector_df.to_csv("{}/{}".format(directory, "vector.csv"), index=False)
scalar_df.to_csv("{}/{}".format(directory, "scalar.csv"), index=False)