In [1]:
import csv 
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

In [2]:
# owner__title__cpu/memory__extraName__sandbox

# e.g. janitha__CPU_Usage__cpu__centralized_plugin__m1
# e.g. janitha__CPU_Usage__cpu__centralized_plugin__m2
# e.g. janitha__CPU_Usage__cpu__centralized_plugin__m3
# e.g. janitha__Memory_Usage__memory__centralized_plugin__m1
# e.g. janitha__Memory_Usage__memory__centralized_plugin__m2
# e.g. janitha__Memory_Usage__memory__centralized_plugin__m3
# e.g. janitha__CPU_Usage__cpu__distributed_plugin__m1
# e.g. janitha__CPU_Usage__cpu__distributed_plugin__m2
# e.g. janitha__CPU_Usage__cpu__distributed_plugin__m3
# e.g. janitha__Memory_Usage__memory__distributed_plugin__m1
# e.g. janitha__Memory_Usage__memory__distributed_plugin__m2
# e.g. janitha__Memory_Usage__memory__distributed_plugin__m3

name_mapper = {
    "x_label" : "Time",
    "cpu" : "CPU (%)",
    "memory": "Memory (MB)",
    "m1" : "Machine 1",
    "m2" : "Machine 2",
    "m3" : "Machine 3",
}

# Create CSV Files

In [3]:
def convert_to_mib(memory):
    # Convert memory to MiB if necessary
    if 'GiB' in memory:
        memory_in_gib = float(memory[:-3])
        memory_in_mib = memory_in_gib * 1024
        return f"{memory_in_mib:.2f}MiB"
    elif 'MiB' in memory:
        return memory
    else:
        raise ValueError("Invalid memory format")


def extract_info(file_name):
    cpu_percentages = []
    mem_usages = []

    with open(file_name, 'r') as file:
        for line in file:
            values = line.split()
            
            cpu_percentages.append(float(values[2][:-1]))
            mem_usages.append(float(convert_to_mib(values[3])[:-3]))

    return cpu_percentages, mem_usages


def write_to_csv(cpu_percentages, mem_usages, output_file):
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([name_mapper["cpu"], name_mapper["memory"]])
        for i in range(len(cpu_percentages)):
            writer.writerow([cpu_percentages[i], mem_usages[i]])

In [4]:
def list_txt_files_in_folder(folder_path):
    txt_files = []
    try:
        for file in os.listdir(folder_path):
            if file.endswith(".txt") and os.path.isfile(os.path.join(folder_path, file)):
                txt_files.append(file)
    except Exception as e:
        print(f"Error occurred while listing .txt files: {e}")
    return txt_files

In [5]:
input_folder_path = "./1_input/"
preprocess_folder_path = "./2_preprocess/"
line_chart_folder_path = "1_line_chart/"

files_in_folder = list_txt_files_in_folder(input_folder_path + line_chart_folder_path)

for file_name in files_in_folder:
    input_file_path = input_folder_path + line_chart_folder_path + file_name
    preprocess_file_path = preprocess_folder_path + line_chart_folder_path + file_name.split(".")[0] + ".csv"

    cpu_percentages, mem_usages = extract_info(input_file_path)
    write_to_csv(cpu_percentages, mem_usages, preprocess_file_path)
    
    print(f'Data has been written to {preprocess_file_path}')

Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__not_downsampled_overlap_to_20_select_all_samples__m3.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__down_sampled_to_10MHz_overlap_to_10_select_samples_from_1_4_to_3_4__m2.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__down_sampled_to_10MHz_overlap_to_10_select_samples_from_1_4_to_3_4__m3.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__not_downsampled_overlap_to_20_select_all_samples__m2.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__down_sampled_to_10MHz_overlap_to_20_select_all_samples__m3.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__CPU_Usage__cpu__down_sampled_to_10MHz_overlap_to_20_select_samples_from_1_4_to_3_4__m3.csv
Data has been written to ./2_preprocess/1_line_chart/dinil__Memory_Usage__memory__not_downsampled_overlap_to_20_

# Generate Charts

In [6]:
def list_csv_files_in_folder(folder_path):
    csv_files = []
    try:
        for file in os.listdir(folder_path):
            if file.endswith(".csv") and os.path.isfile(os.path.join(folder_path, file)):
                csv_files.append(file)
    except Exception as e:
        print(f"Error occurred while listing .csv files: {e}")
    return csv_files

In [7]:
csv_files_in_folder = list_csv_files_in_folder(preprocess_folder_path + line_chart_folder_path)
csv_files_in_folder_sorted = sorted(csv_files_in_folder)

result = {}

for item in csv_files_in_folder_sorted:
    parts = item.split("__")
    key = "__".join(parts[:-1])  # Join all parts except the last one
    value = parts[-1].split(".")[0]
    result.setdefault(key, []).append(value)

print(result)


{'dinil__CPU_Usage__cpu__down_sampled_to_10MHz_overlap_to_10_select_all_samples': ['m2', 'm3'], 'dinil__CPU_Usage__cpu__down_sampled_to_10MHz_overlap_to_10_select_samples_from_1_4_to_3_4': ['m2', 'm3'], 'dinil__CPU_Usage__cpu__down_sampled_to_10MHz_overlap_to_20_select_all_samples': ['m2', 'm3'], 'dinil__CPU_Usage__cpu__down_sampled_to_10MHz_overlap_to_20_select_samples_from_1_4_to_3_4': ['m2', 'm3'], 'dinil__CPU_Usage__cpu__not_downsampled_overlap_to_10_select_all_samples': ['m1', 'm2', 'm3'], 'dinil__CPU_Usage__cpu__not_downsampled_overlap_to_10_select_samples_from_1_4_to_3_4': ['m1', 'm2', 'm3'], 'dinil__CPU_Usage__cpu__not_downsampled_overlap_to_20_select_all_samples': ['m1', 'm2', 'm3'], 'dinil__CPU_Usage__cpu__not_downsampled_overlap_to_20_select_samples_from_1_4_to_3_4': ['m1', 'm2', 'm3'], 'dinil__Memory_Usage__memory__down_sampled_to_10MHz_overlap_to_10_select_all_samples': ['m2', 'm3'], 'dinil__Memory_Usage__memory__down_sampled_to_10MHz_overlap_to_10_select_samples_from_1_4_

In [8]:
chart_folder_path = "./3_chart/"

# Line Chart Generation

In [9]:
for key, values in result.items():
    for value in values:
        file_path = preprocess_folder_path + line_chart_folder_path + key + "__" + value + ".csv"
        usage_type = key.split("__")[2]
        title = " ".join(key.split("__")[1].split("_"))
        chart_folder = key.split("__")[0]
        usage_values = []
        
        with open(file_path, "r") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                usage_value = float(row[name_mapper[usage_type]])
                usage_values.append(usage_value)
    
        x = np.arange(len(usage_values))

        y = np.array(usage_values)
        f = interp1d(x, y, kind='cubic')
        x_new = np.linspace(x.min(), x.max(), 300)
        y_smooth = f(x_new)
        
        plt.plot(x_new, y_smooth, marker='', linestyle='-', label=name_mapper[value])
        plt.title(title)
        plt.xlabel(name_mapper["x_label"])
        plt.ylabel(name_mapper[usage_type])
        plt.grid(True)
        plt.legend()
        plt.tight_layout()

        chart_file_name = key + ".png"
        chart_file_path = chart_folder_path + chart_folder + "/" + line_chart_folder_path + chart_file_name
        plt.savefig(chart_file_path, dpi=300)
    plt.clf()

<Figure size 640x480 with 0 Axes>

# Box Plot Generation

In [10]:
# m0 = not depends on the sandbox
# owner__title__xlabel__ylabel__extraName__sandbox

# e.g. dinil__Sampling_Rate_=_10MHz__Time_(s)__File_Size_(MB)__file_size_against_time_10MHz__m1

In [11]:
box_plot_folder_path = "2_box_plot/"

csv_files_in_folder = list_csv_files_in_folder(input_folder_path + box_plot_folder_path)

for filename in csv_files_in_folder:
    data = pd.read_csv(input_folder_path + box_plot_folder_path + filename)
    owner_name = filename.split("__")[0]
    

    title = " ".join(filename.split("__")[1].split("_"))
    xlabel = " ".join(filename.split("__")[2].split("_"))
    ylabel = " ".join(filename.split("__")[3].split("_"))

    # Creating the box plot
    bp = plt.boxplot(data.values, labels=data.columns, patch_artist=True)

    # List of colors for each box
    box_colors = ['pink', 'lightblue', 'lightgreen']

    # Assigning colors to each box
    for i, box in enumerate(bp['boxes']):
        color_index = i % len(box_colors)  # Wrap around the color index
        box.set_facecolor(box_colors[color_index])

    # Adding labels and title
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    # Displaying the plot
    plt.savefig(chart_folder_path + owner_name + "/" + box_plot_folder_path + filename.split(".")[0] + ".png", dpi=300)
    plt.clf()
    plt.close()


# Bar Chart Generation

In [12]:
# m0 = not depends on the sandbox
# owner__title__xlabel__ylabel__extraName__sandbox

# e.g. dinil__Preproessing__Setting__File_Size_(MB)__file_size_against_preproessing_settings__m0

In [13]:
bar_chart_folder_path = "3_bar_chart/"

csv_files_in_folder = list_csv_files_in_folder(input_folder_path + bar_chart_folder_path)

for filename in csv_files_in_folder:
    data = pd.read_csv(input_folder_path + bar_chart_folder_path + filename)
    owner_name = filename.split("__")[0]
    

    title = " ".join(filename.split("__")[1].split("_"))
    xlabel = " ".join(filename.split("__")[2].split("_"))
    ylabel = " ".join(filename.split("__")[3].split("_"))

    categories = data['Category']
    values = data['Value']

    # Create bar chart
    plt.bar(categories, values, color='skyblue')

    # Add labels and title
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.savefig(chart_folder_path + owner_name + "/" + bar_chart_folder_path + filename.split(".")[0] + ".png", dpi=300)
    plt.close()