In [22]:
import pandas as pd
from sklearn.metrics import confusion_matrix

def check_correntness(input_path, output_path):
    df = pd.read_csv(input_path, sep=",")

    # Initialize empty lists to store the results
    results = []

    # Get unique combinations of numTasks and utilization
    combinations = df[['numTasks', 'utilization']].drop_duplicates()

    # Loop through each combination
    for i, (num_tasks, utilization) in combinations.iterrows():
        # Subset the data for the current combination
        subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]

        # Calculate confusion matrix for the subset
        confusion = confusion_matrix(subset['simulator_schedulability'], subset['proposed_schedulability'])

        # Calculate True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
        if (confusion.shape == (2, 2)):
            TP = confusion[1, 1]
            TN = confusion[0, 0]
            FP = confusion[0, 1]
            FN = confusion[1, 0]
        else:
            assert confusion.shape == (1, 1)
            if subset['simulator_schedulability'].iloc[0] == False:
                TP = 0
                TN = confusion[0, 0]
                FP = 0
                FN = 0
            else:
                TP = confusion[0, 0]
                TN = 0
                FP = 0
                FN = 0
                
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        # precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        # recall = TP / (TP + FN) if (TP + FN) != 0 else 0

        # Append the results to the list
        results.append([num_tasks, utilization, TP, TN, FP, FN, accuracy])
        # Calculate accuracy, precision, and recall for the subset

    # Create a DataFrame to store the results
    results_df = pd.DataFrame(results, columns=['numTasks', 'utilization', 'TP', 'TN', 'FP', 'FN', 'accuracy'])

    # sort the results by numTasks and utilization
    results_df = results_df.sort_values(['numTasks', 'utilization'])
    
    # Save the results to a CSV file
    results_df.to_csv(output_path, index=False)

    # Display the results
    print(results_df)


In [23]:
def show_detail_result(input_path, num_tasks, utilization):
    df = pd.read_csv(input_path, sep=",")
    subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]
    # merge subsets of simulator_schedulability and proposed_schedulability
    print(subset[['simulator_schedulability', 'proposed_schedulability']])

In [24]:
import os
import glob
import re

def combine_detail_result(detail_result_dir, numCores, numTasks, utilizations, output_path):
    current_dir = os.getcwd()
    
    results = []
    for num_cores in numCores:
        for num_tasks in numTasks:
            for utilization in utilizations:
    # detail_input_dir = exp_results/
    # file_name sample = exp_results/detail_result/1cores/3tasks/0.2utilization/1cores_3tasks_0.2utilization_0_result.csv
                detail_result_dir = os.path.join(detail_result_dir, str(num_cores) + 'cores', str(num_tasks) + 'tasks', str(utilization) + 'utilization')
                os.chdir(detail_result_dir)
                extension = 'csv'
                all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

                # combine all files in the list 
                # append filename as a first column
                for filename in all_filenames:
                    pattern = r'(\d+)cores_(\d+)tasks_(\d+\.\d+)utilization_(\d+)_result.csv'
                    match = re.match(pattern, filename)

                    if match:                        
                        df = pd.read_csv(filename, sep=",", header=None, index_col=0)
                        df = df.iloc[1:] # remove header
                        df['numCores'] = num_cores
                        df['numTasks'] = num_tasks
                        df['utilization'] = utilization
                        df['dataset_index'] = int(match.group(4))
                        
                        results.append(df)
    
    # add column names
    combined_df = pd.concat(results, axis=0, ignore_index=True)
    combined_df.columns = ['name', 'WCRT(sim)', 'simulator_schedulability', 'WCRT(prop)', 'proposed_schedulability', 'numCores', 'numTasks', 'utilization', 'dataset_index']
    # sort the results by numTasks and utilization
    combined_df = combined_df.sort_values(['numTasks', 'utilization'])
    
    os.chdir(current_dir)
    combined_df.to_csv(output_path, index=False)

In [25]:
# Read the CSV file into a DataFrame
result_dir = "../../exp_results"

In [26]:
input_path = result_dir + "/result_summary.csv"
output_path = result_dir + "/correctness_per_dataset.csv"

check_correntness(input_path, output_path)

   numTasks  utilization  TP  TN  FP  FN  accuracy
0       3.0          0.2   2   0   0   0       1.0


In [27]:
numCores = [1]
numTasks = [3, 6, 9, 12, 15]
utilizations = [0.2, 0.4, 0.6, 0.8]
detail_result_dir = result_dir + "/detail_result"
combine_detail_result_path = detail_result_dir + "/combined_detail_result.csv"
combine_detail_result(detail_result_dir, numCores, numTasks, utilizations, combine_detail_result_path)

input_path = combine_detail_result_path
output_path = result_dir + "/correctness_per_task.csv"

check_correntness(input_path, output_path)

   numTasks  utilization  TP  TN  FP  FN  accuracy
0       3.0          0.2   6   0   0   0       1.0


In [33]:
df = pd.read_csv(combine_detail_result_path, sep=",")

wrong_result = df[(df['WCRT(sim)'] > df['WCRT(prop)'])]

print(wrong_result[['numTasks', 'utilization', 'dataset_index', 'name', 'WCRT(sim)', 'WCRT(prop)']])

Empty DataFrame
Columns: [numTasks, utilization, dataset_index, name, WCRT(sim), WCRT(prop)]
Index: []
