In [None]:
import csv 
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d

In [None]:
# owner__title__cpu/memory__sandbox__extraName

# e.g. janitha__CPU_Usage__cpu__m1__centralized_plugin
# e.g. janitha__CPU_Usage__cpu__m2__centralized_plugin

name_mapper = {
    "x_label" : "Time",
    "time" : "Time (s)",
    "cpu" : "CPU (%)",
    "memory": "Memory (MB)",
    "m1" : "Machine 1",
    "m2" : "Machine 2",
    "m3" : "Machine 3",
    "traditional_1_5GB" : "Traditional (1.51 GB)",
    "traditional_4_5GB" : "Traditional (4.46 GB)",
    "chuck_15MB_1_5GB" : "Chuck 15 MB (1.51 GB)",
    "chuck_15MB_4_5GB" : "Chuck 15 MB (4.46 GB)",
    "chuck_25MB_1_5GB" : "Chuck 25 MB (1.51 GB)",
    "chuck_25MB_4_5GB" : "Chuck 25 MB (4.46 GB)",
    "chuck_50MB_1_5GB" : "Chuck 50 MB (1.51 GB)",
    "chuck_50MB_4_5GB" : "Chuck 50 MB (4.46 GB)",
    "gzip_1_5GB" : "Gzip (1.51 GB)",
    "gzip_4_5GB" : "Gzip (4.46 GB)",
    "bzip2_1_5GB" : "bzip2 (1.51 GB)",
    "bzip2_4_5GB" : "bzip2 (4.46 GB)",
    "centralized_plugin" : "Centralized Plugin",
    "distributed_plugin" : "Distributed Plugin",
    "not_downsampled_overlap_to_10_select_all_samples" : "S1",
    "not_downsampled_overlap_to_10_select_samples_from_1_4_to_3_4" : "S2",
    "not_downsampled_overlap_to_20_select_all_samples" : "S3",
    "not_downsampled_overlap_to_20_select_samples_from_1_4_to_3_4" : "S4",
    "down_sampled_to_10MHz_overlap_to_10_select_all_samples" : "S5",
    "down_sampled_to_10MHz_overlap_to_10_select_samples_from_1_4_to_3_4" : "S6",
    "down_sampled_to_10MHz_overlap_to_20_select_all_samples" : "S7",
    "down_sampled_to_10MHz_overlap_to_20_select_samples_from_1_4_to_3_4" : "S8",
    # "" : "",
}

# Create CSV Files

In [None]:
def convert_to_mib(memory):
    # Convert memory to MiB if necessary
    if 'GiB' in memory:
        memory_in_gib = float(memory[:-3])
        memory_in_mib = memory_in_gib * 1024
        return f"{memory_in_mib:.2f}MiB"
    elif 'MiB' in memory:
        return memory
    else:
        raise ValueError("Invalid memory format")


def extract_info(file_name):
    cpu_percentages = []
    mem_usages = []
    time_values = []

    with open(file_name, 'r') as file:
        for line_number, line in enumerate(file):
            values = line.split()
            
            if line_number % 3 == 1:
                cpu_percentages.append(float(values[2][:-1]))
                mem_usages.append(float(convert_to_mib(values[3])[:-3]))
            elif line_number % 3 == 2:
                time_values.append(float(values[0]))

    return cpu_percentages, mem_usages, time_values


def write_to_csv(cpu_percentages, mem_usages, time_values, output_file):
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow([name_mapper["time"], name_mapper["cpu"], name_mapper["memory"]])
        for i in range(len(cpu_percentages)):
            writer.writerow([time_values[i], cpu_percentages[i], mem_usages[i]])

In [None]:
def list_txt_files_in_folder(folder_path):
    txt_files = []
    try:
        for file in os.listdir(folder_path):
            if file.endswith(".txt") and os.path.isfile(os.path.join(folder_path, file)):
                txt_files.append(file)
    except Exception as e:
        print(f"Error occurred while listing .txt files: {e}")
    return txt_files

In [None]:
input_folder_path = "./1_input/"
preprocess_folder_path = "./2_preprocess/"
line_chart_folder_path = "1_line_chart/"

files_in_folder = list_txt_files_in_folder(input_folder_path + line_chart_folder_path)

for file_name in files_in_folder:
    input_file_path = input_folder_path + line_chart_folder_path + file_name
    preprocess_file_path = preprocess_folder_path + line_chart_folder_path + file_name.split(".")[0] + ".csv"

    cpu_percentages, mem_usages, time_values = extract_info(input_file_path)
    write_to_csv(cpu_percentages, mem_usages, time_values, preprocess_file_path)
    
    print(f'Data has been written to {preprocess_file_path}')

# Generate Charts

In [None]:
def list_csv_files_in_folder(folder_path):
    csv_files = []
    try:
        for file in os.listdir(folder_path):
            if file.endswith(".csv") and os.path.isfile(os.path.join(folder_path, file)):
                csv_files.append(file)
    except Exception as e:
        print(f"Error occurred while listing .csv files: {e}")
    return csv_files

In [None]:
# janitha__CPU_Usage__cpu__m1__centralized_plugin
# janitha__CPU_Usage__cpu__m1__distributed_plugin

csv_files_in_folder = list_csv_files_in_folder(preprocess_folder_path + line_chart_folder_path)
csv_files_in_folder_sorted = sorted(csv_files_in_folder)

result = {}

for item in csv_files_in_folder_sorted:
    parts = item.split("__")
    key = "__".join(parts[:-1])  # Join all parts except the last one
    value = parts[-1].split(".")[0]
    result.setdefault(key, []).append(value)

print(result)


In [None]:
chart_folder_path = "./3_chart/"

# Line Chart Generation

In [None]:
# janitha__CPU_Usage__cpu__m1__centralized_plugin
for key, values in result.items():
    # values = sorted(values)
    # print(values)

    mapped_dictionary = {name_mapper[value]:value for value in values}
    mapped_array = [name_mapper[value] for value in values]
    mapped_array = sorted(mapped_array)
    values = [mapped_dictionary[item] for item in mapped_array]

    for value in values:
        file_path = preprocess_folder_path + line_chart_folder_path + key + "__" + value + ".csv"
        usage_type = key.split("__")[2]
        title = " ".join(key.split("__")[1].split("_"))
        chart_folder = key.split("__")[0]
        usage_values = []
        time_values = []
        
        with open(file_path, "r") as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                time_value = float(row[name_mapper["time"]])
                usage_value = float(row[name_mapper[usage_type]])
                time_values.append(time_value)
                usage_values.append(usage_value)
    
        
        x = np.array(time_values)

        y = np.array(usage_values)
        # y_min = np.min(y)
        # y = y - y_min
        f = interp1d(x, y, kind='cubic')
        x_new = np.linspace(x.min(), x.max(), 300)
        y_smooth = f(x_new)

        # Set negative values to zero
        y_smooth[y_smooth < 0] = 0
        
        plt.plot(x_new, y_smooth, marker='', linestyle='-', label=name_mapper[value])
        plt.title(title)
        plt.xlabel(name_mapper["x_label"])
        plt.ylabel(name_mapper[usage_type])
        plt.grid(True)
        plt.legend()
        plt.tight_layout()

        chart_file_name = key + ".png"
        chart_file_path = chart_folder_path + chart_folder + "/" + line_chart_folder_path + chart_file_name
        plt.savefig(chart_file_path, dpi=300)
    plt.clf()

# Box Plot Generation

In [None]:
# m0 = not depends on the sandbox
# owner__title__xlabel__ylabel__horizontal_line_value__extraName__sandbox

# e.g. dinil__Sampling_Rate_=_10MHz__Time_(s)__ __File_Size_(MB)__file_size_against_time_10MHz__m1
# dinil__M1__Preproessing_Setting__Time_(s)__ __time_against_preproessing_setting_for_each_sandbox__m1
# dinil__M2__Preproessing_Setting__Time_(s)__ __time_against_preproessing_setting_for_each_sandbox__m2
# dinil__M3__Preproessing_Setting__Time_(s)__ __time_against_preproessing_setting_for_each_sandbox__m3

In [16]:
box_plot_folder_path = "2_box_plot/"

csv_files_in_folder = list_csv_files_in_folder(input_folder_path + box_plot_folder_path)

for filename in csv_files_in_folder:
    data = pd.read_csv(input_folder_path + box_plot_folder_path + filename)
    owner_name = filename.split("__")[0]
    

    title = " ".join(filename.split("__")[1].split("_"))
    xlabel = " ".join(filename.split("__")[2].split("_"))
    ylabel = " ".join(filename.split("__")[3].split("_"))
    horizontal_line_value = filename.split("__")[4]

    # Creating the box plot
    bp = plt.boxplot(data.values, labels=data.columns, patch_artist=True)

    # List of colors for each box
    box_colors = ['pink', 'lightblue', 'lightgreen']

    # Assigning colors to each box
    for i, box in enumerate(bp['boxes']):
        color_index = i % len(box_colors)  # Wrap around the color index
        box.set_facecolor(box_colors[color_index])

    if xlabel == "Uploading Mechanism":
        # minimize font size
        plt.xticks(fontsize=6)
    # Adding labels and title
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    if horizontal_line_value != " ":
        plt.axhline(y=float(horizontal_line_value), color='r', linestyle='--')

    # Displaying the plot
    plt.savefig(chart_folder_path + owner_name + "/" + box_plot_folder_path + filename.split(".csv")[0] + ".png", dpi=300)
    plt.clf()
    plt.close()


# Bar Chart Generation

In [None]:
# m0 = not depends on the sandbox
# owner__title__xlabel__ylabel__extraName__sandbox

# e.g. dinil__Preproessing__Setting__File_Size_(MB)__file_size_against_preproessing_settings__m0

In [None]:
bar_chart_folder_path = "3_bar_chart/"

csv_files_in_folder = list_csv_files_in_folder(input_folder_path + bar_chart_folder_path)

for filename in csv_files_in_folder:
    data = pd.read_csv(input_folder_path + bar_chart_folder_path + filename)
    owner_name = filename.split("__")[0]
    

    title = " ".join(filename.split("__")[1].split("_"))
    xlabel = " ".join(filename.split("__")[2].split("_"))
    ylabel = " ".join(filename.split("__")[3].split("_"))

    categories = data['Category']
    values = data['Value']

    # Create bar chart
    plt.bar(categories, values, color='skyblue')

    # Add labels and title
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.savefig(chart_folder_path + owner_name + "/" + bar_chart_folder_path + filename.split(".")[0] + ".png", dpi=300)
    plt.close()

# Customized code for a Box Plot

In [None]:
filename = "dinil__Sampling_Rate_=_10MHz__Time_(s)__File_Size_(MB)__762.94__file_size_against_sampling_rate_10s___m0.csv"
file_path = input_folder_path + box_plot_folder_path + filename

data = pd.read_csv(file_path)

In [None]:
data = pd.read_csv(input_folder_path + box_plot_folder_path + filename)
owner_name = filename.split("__")[0]


title = " ".join(filename.split("__")[1].split("_"))
xlabel = " ".join(filename.split("__")[2].split("_"))
ylabel = " ".join(filename.split("__")[3].split("_"))
horizontal_line_value = filename.split("__")[4]

y_min = 10 * (data.values.min() // 10)
y_max = 10 * ((data.values.max() + 9) // 10)

# Creating the box plot
bp = plt.boxplot(data.values, labels=data.columns, patch_artist=True)

# List of colors for each box
box_colors = ['pink', 'lightblue', 'lightgreen']

# Assigning colors to each box
for i, box in enumerate(bp['boxes']):
    color_index = i % len(box_colors)  # Wrap around the color index
    box.set_facecolor(box_colors[color_index])

# Adding labels and title
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)

if horizontal_line_value != " ":
    plt.axhline(y=float(horizontal_line_value), color='r', linestyle='--')

# Set y-axis range to multiples of 10
plt.ylim(y_min, y_max)

# Set y-axis ticks to multiples of 10
plt.yticks(np.arange(y_min, y_max + 10, 10))
# Displaying the plot
plt.savefig(chart_folder_path + owner_name + "/" + box_plot_folder_path + filename.split(".csv")[0] + ".png", dpi=300)
plt.clf()
plt.close()