In [4]:
import os
import multiprocessing
import pandas as pd
import logging
import re
from itertools import repeat
from natsort import natsorted
import shutil
import csv
import numpy as np

In [5]:
vector_dec_line_pattern = re.compile("^vector")
vector_res_line_pattern = re.compile("^\d+")

In [6]:
stats_of_interest = [
    "sciSent",
    "tbSent",
    "txRxDistanceSCI",
    "txRxDistanceTB",
    "sciReceived",
    "sciDecoded",
    "sciFailedHalfDuplex",
    "tbReceived",
    "tbDecoded",
    "tbFailedDueToNoSCI",
    "tbFailedHalfDuplex",
    "tbFailedButSCIReceived",
    "sciUnsensed",
    "missedTransmission",
    "generatedGrants",
    "selectedSubchannelIndex",
    "selectedNumSubchannels",
    "grantBreak",
    "grantBreakMissedTrans",
    "grantBreakSize",
    "tbFailedDueToProp",
    "tbFailedDueToInterference",
    "sciFailedDueToProp",
    "sciFailedDueToInterference",
    "cbr",
    "subchannelReceived",
    "subchannelsUsed",
    "senderID",
    "subchannelsUsedToSend",
    "subchannelSent",
    "grantStartTime",
    "selectedSubchannelIndex",
    "selectedNumSubchannels",
    "posX",
    "posY",
    "interPacketDelay"
]

In [7]:
def tidy_data(real_vector_path, json_fields, output_csv, read=True, pivot=True, combine=True):
    # Simply remove the :vector part of vector names from both sets of vectors.
    found_vector = False
    for field in json_fields:
        if ":vector" in field:
            found_vector = True
            break

    if found_vector:
        json_fields = remove_vectors(json_fields)

    print(json_fields)

    print("Beginning parsing of vector file: {}".format(real_vector_path))

    # Read the vector file into a csv file
    chunk_folder = output_csv.split(".")[0]
    
    if read:
        read_vector_file(output_csv, real_vector_path, json_fields)

    print("File read, begin pivoting csv file: {}".format(real_vector_path))
    
    if pivot:
        csv_pivot(chunk_folder, json_fields)

    print("Pivot complete, consolidate chunk files for {}".format(output_csv))
    
    if combine:
        combine_files(chunk_folder, output_csv)

    print("Finished parsing of vector file: {}".format(real_vector_path))

In [8]:
def csv_pivot(directory, stats):
    orig_loc = os.getcwd()
    os.chdir(directory)

    csv_files = os.listdir(os.getcwd())
    csv_files = natsorted(csv_files)
    header = True
    for csv_file in csv_files:
        if ".csv" in csv_file:
            print("Pivoting chunk file: {}".format(csv_file))
            chunk_df = pd.read_csv(csv_file)

            chunk_df = chunk_df.infer_objects()

            chunk_df = chunk_df.sort_values(by=["NodeID", "Time"])
            # Parse the vector file to ensure it is formatted correclty.
            chunk_df['seq'] = chunk_df.groupby(["Time", "NodeID", "StatisticName"]).cumcount()

            chunk_df = chunk_df.pivot_table("Value", ["Time", "NodeID", "seq"], "StatisticName")
            chunk_df.reset_index(inplace=True)
            chunk_df = chunk_df.drop(["seq"], axis=1)

            # Ensure all fields correctly filled
            for field in stats:
                if field not in chunk_df.columns:
                    chunk_df[field] = np.nan

            # Ensure the order of the files is also correct
            chunk_df = chunk_df.reindex(sorted(chunk_df.columns), axis=1)

            chunk_df.to_csv(csv_file, index=False, header=header)
            header = False

            del chunk_df

    os.chdir(orig_loc)

In [9]:
def combine_files(csv_directory, outfile):
    destination = open(outfile, 'wb')

    orig_loc = os.getcwd()
    os.chdir(csv_directory)

    csv_files = os.listdir(os.getcwd())
    csv_files = natsorted(csv_files)
    for csv_file in csv_files:
        if ".csv" in csv_file and csv_file != outfile:
            print("Merging chunk file: {} into {}".format(csv_file, outfile))
            shutil.copyfileobj(open(csv_file, 'rb'), destination)
            os.remove(csv_file)
    destination.close()

    os.chdir(orig_loc)

    os.rmdir(csv_directory)

In [10]:
## Setup 
real_vector_path = "/hdd/results-analysis/data/omnet/cv2x/RRI-Adaptation-CR-Limit-50-2020-05-28-10_01_47/run-1.vec"
output_csv ="/hdd/results-analysis/data/parsed_data/cv2x/RRI-Adaptation-CR-Limit-50-2020-05-28-10_01_47/run-1.csv"

In [11]:
tidy_data(real_vector_path, stats_of_interest, output_csv, read=False)

['sciSent', 'tbSent', 'txRxDistanceSCI', 'txRxDistanceTB', 'sciReceived', 'sciDecoded', 'sciFailedHalfDuplex', 'tbReceived', 'tbDecoded', 'tbFailedDueToNoSCI', 'tbFailedHalfDuplex', 'tbFailedButSCIReceived', 'sciUnsensed', 'missedTransmission', 'generatedGrants', 'selectedSubchannelIndex', 'selectedNumSubchannels', 'grantBreak', 'grantBreakMissedTrans', 'grantBreakSize', 'tbFailedDueToProp', 'tbFailedDueToInterference', 'sciFailedDueToProp', 'sciFailedDueToInterference', 'cbr', 'subchannelReceived', 'subchannelsUsed', 'senderID', 'subchannelsUsedToSend', 'subchannelSent', 'grantStartTime', 'selectedSubchannelIndex', 'selectedNumSubchannels', 'posX', 'posY', 'interPacketDelay']
Beginning parsing of vector file: /hdd/results-analysis/data/omnet/cv2x/RRI-Adaptation-CR-Limit-50-2020-05-28-10_01_47/run-1.vec
File read, begin pivoting csv file: /hdd/results-analysis/data/omnet/cv2x/RRI-Adaptation-CR-Limit-50-2020-05-28-10_01_47/run-1.vec
Pivoting chunk file: chunk-0.csv
Pivoting chunk file: 