In [11]:
import pandas as pd
from sklearn.metrics import confusion_matrix

def check_correntness(input_path, output_path):
    df = pd.read_csv(input_path, sep=",")

    # Initialize empty lists to store the results
    results = []

    # Get unique combinations of numTasks and utilization
    combinations = df[['numTasks', 'utilization']].drop_duplicates()

    # Loop through each combination
    for i, (num_tasks, utilization) in combinations.iterrows():
        # Subset the data for the current combination
        subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]

        # Calculate confusion matrix for the subset
        confusion = confusion_matrix(subset['simulator_schedulability'], subset['proposed_schedulability'])

        # Calculate True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
        if (confusion.shape == (2, 2)):
            TP = confusion[1, 1]
            TN = confusion[0, 0]
            FP = confusion[0, 1]
            FN = confusion[1, 0]
        else:
            assert confusion.shape == (1, 1)
            if subset['simulator_schedulability'].iloc[0] == False:
                TP = 0
                TN = confusion[0, 0]
                FP = 0
                FN = 0
            else:
                TP = confusion[0, 0]
                TN = 0
                FP = 0
                FN = 0
                
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        # precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        # recall = TP / (TP + FN) if (TP + FN) != 0 else 0

        # Append the results to the list
        results.append([num_tasks, utilization, TP, TN, FP, FN, accuracy])
        # Calculate accuracy, precision, and recall for the subset

    # Create a DataFrame to store the results
    results_df = pd.DataFrame(results, columns=['numTasks', 'utilization', 'TP', 'TN', 'FP', 'FN', 'accuracy'])

    # sort the results by numTasks and utilization
    results_df = results_df.sort_values(['numTasks', 'utilization'])
    
    # Save the results to a CSV file
    results_df.to_csv(output_path, index=False)

    # Display the results
    print(results_df)


In [12]:
def show_detail_result(input_path, num_tasks, utilization):
    df = pd.read_csv(input_path, sep=",")
    subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]
    # merge subsets of simulator_schedulability and proposed_schedulability
    print(subset[['simulator_schedulability', 'proposed_schedulability']])

In [13]:
import os
import glob
import re

def combine_detail_result(detail_result_dir, numCores, numTasks, utilizations, output_path):
    current_dir = os.getcwd()
    
    results = []
    for num_cores in numCores:
        for num_tasks in numTasks:
            for utilization in utilizations:
    # detail_input_dir = exp_results/
    # file_name sample = exp_results/detail_result/1cores/3tasks/0.2utilization/1cores_3tasks_0.2utilization_0_result.csv
                tmp_detail_result_dir = os.path.join(detail_result_dir, str(num_cores) + 'cores', str(num_tasks) + 'tasks', str(utilization) + 'utilization')
                os.chdir(tmp_detail_result_dir)
                extension = 'csv'
                all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

                # combine all files in the list 
                # append filename as a first column
                for filename in all_filenames:
                    pattern = r'(\d+)cores_(\d+)tasks_(\d+\.\d+)utilization_(\d+)_result.csv'
                    match = re.match(pattern, filename)

                    if match:                        
                        df = pd.read_csv(filename, sep=",", header=None, index_col=0)
                        df = df.iloc[1:] # remove header
                        df['numCores'] = num_cores
                        df['numTasks'] = num_tasks
                        df['utilization'] = utilization
                        df['dataset_index'] = int(match.group(4))
                        
                        results.append(df)
                os.chdir(current_dir)
    
    # add column names
    combined_df = pd.concat(results, axis=0, ignore_index=True)
    combined_df.columns = ['name', 'WCRT(sim)', 'simulator_schedulability', 'WCRT(prop)', 'proposed_schedulability', 'numCores', 'numTasks', 'utilization', 'dataset_index']
    # sort the results by numTasks and utilization
    combined_df = combined_df.sort_values(['numTasks', 'utilization'])
    
    
    combined_df.to_csv(output_path, index=False)

In [14]:
# Read the CSV file into a DataFrame
result_dir = "../../exp_results"

In [15]:
os.getcwd()

'/home/ykw6644/workspace/SchedulabilityAnalysis/paper_experiment/cfs-wcrt-simulator/script/analysis'

In [16]:
input_path = result_dir + "/result_summary.csv"
output_path = result_dir + "/correctness_per_dataset.csv"

check_correntness(input_path, output_path)

    numTasks  utilization  TP  TN  FP  FN  accuracy
0        3.0          0.2  48   0   0   2      0.96
1        3.0          0.4  43   5   1   1      0.96
2        3.0          0.6  36   9   3   2      0.90
3        3.0          0.8  25  20   4   1      0.90
4        6.0          0.2  39   2   0   9      0.82
5        6.0          0.4  33  10   2   5      0.86
6        6.0          0.6  20  20   5   5      0.80
7        6.0          0.8   8  31   6   5      0.78
8        9.0          0.2  29   8   0  13      0.74
9        9.0          0.4  23  14   2  11      0.74
10       9.0          0.6   8  34   6   2      0.84
11       9.0          0.8   3  42   2   3      0.90
12      12.0          0.2  22   8   0  20      0.60
13      12.0          0.4  17  12   1  20      0.58
14      12.0          0.6   3  37   2   8      0.80
15      12.0          0.8   1  42   1   6      0.86
16      15.0          0.2  19   9   0  22      0.56
17      15.0          0.4  11  16   2  21      0.54
18      15.0

In [17]:
numCores = [1]
numTasks = [3, 6, 9, 12, 15]
utilizations = [0.2, 0.4, 0.6, 0.8]
detail_result_dir = result_dir + "/detail_result"
combine_detail_result_path = detail_result_dir + "/combined_detail_result.csv"
combine_detail_result(detail_result_dir, numCores, numTasks, utilizations, combine_detail_result_path)


input_path = combine_detail_result_path
output_path = result_dir + "/correctness_per_task.csv"

check_correntness(input_path, output_path)

    numTasks  utilization   TP   TN  FP  FN  accuracy
0        3.0          0.2  148    0   0   2  0.986667
1        3.0          0.4  144    5   0   1  0.993333
2        3.0          0.6  138    9   0   3  0.980000
3        3.0          0.8  127   22   0   1  0.993333
4        6.0          0.2  288    4   0   8  0.973333
5        6.0          0.4  284   10   0   6  0.980000
6        6.0          0.6  270   24   0   6  0.980000
7        6.0          0.8  240   46   1  13  0.953333
8        9.0          0.2  425    8   0  17  0.962222
9        9.0          0.4  414   21   0  15  0.966667
10       9.0          0.6  393   40   0  17  0.962222
11       9.0          0.8  349   68   0  33  0.926667
12      12.0          0.2  555   13   0  32  0.946667
13      12.0          0.4  553   17   0  30  0.950000
14      12.0          0.6  498   64   0  38  0.936667
15      12.0          0.8  475   81   2  42  0.926667
16      15.0          0.2  705   18   0  27  0.964000
17      15.0          0.4  6

In [18]:
df = pd.read_csv(combine_detail_result_path, sep=",")

# wrong_result = df[(df['WCRT(sim)'] > df['WCRT(prop)']) & (df['WCRT(prop)'] != 0)]
wrong_result = df[(df['simulator_schedulability'] == False) & (df['proposed_schedulability'] == True)]

print(wrong_result[['numTasks', 'utilization', 'dataset_index', 'name', 'WCRT(sim)', 'WCRT(prop)']])

      numTasks  utilization  dataset_index   name  WCRT(sim)  WCRT(prop)
1503         6          0.8             33  task4     507757      469804
5586        12          0.8             20  task7     659799      439240
5742        12          0.8             42  task7     554488      445392
8986        15          0.8             31  task2     535405      473884
8990        15          0.8             31  task6     544274      473884
