In [90]:
import pandas as pd
from sklearn.metrics import confusion_matrix

def check_correntness(input_path, output_path):
    df = pd.read_csv(input_path, sep=",")

    # Initialize empty lists to store the results
    results = []

    # Get unique combinations of numTasks and utilization
    combinations = df[['numTasks', 'utilization']].drop_duplicates()

    # Loop through each combination
    for i, (num_tasks, utilization) in combinations.iterrows():
        # Subset the data for the current combination
        subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]

        # Calculate confusion matrix for the subset
        confusion = confusion_matrix(subset['simulator_schedulability'], subset['proposed_schedulability'])

        # Calculate True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
        if (confusion.shape == (2, 2)):
            TP = confusion[1, 1]
            TN = confusion[0, 0]
            FP = confusion[0, 1]
            FN = confusion[1, 0]
        else:
            assert confusion.shape == (1, 1)
            if subset['simulator_schedulability'].iloc[0] == False:
                TP = 0
                TN = confusion[0, 0]
                FP = 0
                FN = 0
            else:
                TP = confusion[0, 0]
                TN = 0
                FP = 0
                FN = 0
                
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        # precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        # recall = TP / (TP + FN) if (TP + FN) != 0 else 0

        # Append the results to the list
        results.append([num_tasks, utilization, TP, TN, FP, FN, accuracy])
        # Calculate accuracy, precision, and recall for the subset

    # Create a DataFrame to store the results
    results_df = pd.DataFrame(results, columns=['numTasks', 'utilization', 'TP', 'TN', 'FP', 'FN', 'accuracy'])

    # sort the results by numTasks and utilization
    results_df = results_df.sort_values(['numTasks', 'utilization'])
    
    # Save the results to a CSV file
    results_df.to_csv(output_path, index=False)

    # Display the results
    print(results_df)


In [91]:
def show_detail_result(input_path, num_tasks, utilization):
    df = pd.read_csv(input_path, sep=",")
    subset = df[(df['numTasks'] == num_tasks) & (df['utilization'] == utilization)]
    # merge subsets of simulator_schedulability and proposed_schedulability
    print(subset[['simulator_schedulability', 'proposed_schedulability']])

In [92]:
import os
import glob
import re

def combine_detail_result(detail_input_dir, output_path):
    current_dir = os.getcwd()
    os.chdir(detail_input_dir)
    extension = 'csv'
    all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

    # combine all files in the list 
    # append filename as a first column
    result_df = []
    for filename in all_filenames:
        
        pattern = r'(\d+)cores_(\d+)tasks_(\d+\.\d+)utilization_(\d+)_result.csv'
        match = re.match(pattern, filename)

        if match:
            num_cores = int(match.group(1))
            num_tasks = int(match.group(2))
            utilization = float(match.group(3))
            dataset_index = int(match.group(4))

            
            df = pd.read_csv(filename, sep=",", header=None, index_col=0)
            df = df.iloc[1:] # remove header
            df['numCores'] = num_cores
            df['numTasks'] = num_tasks
            df['utilization'] = utilization
            df['dataset_index'] = dataset_index
            
            result_df.append(df)
            
    combined_df = pd.concat(result_df, axis=0, ignore_index=True)
    combined_df.columns = ['name', 'WCRT_by_simulator', 'simulator_schedulability', 'WCRT_by_proposed', 'proposed_schedulability', 'numCores', 'numTasks', 'utilization', 'dataset_index']
    
    # sort the results by numTasks and utilization
    combined_df = combined_df.sort_values(['numTasks', 'utilization'])
    
    os.chdir(current_dir)
    combined_df.to_csv(output_path, index=False)

In [93]:
# Read the CSV file into a DataFrame
result_dir = "../../exp_results"

In [94]:
input_path = result_dir + "/result_summary.csv"
output_path = result_dir + "/correctness_per_dataset.csv"

check_correntness(input_path, output_path)

    numTasks  utilization  TP  TN  FP  FN  accuracy
0        3.0          0.2  45   3   0   2      0.96
1        3.0          0.4  38   8   0   4      0.92
2        3.0          0.6  36  10   0   4      0.92
3        3.0          0.8  26  21   0   3      0.94
4        6.0          0.2  39   5   0   6      0.88
5        6.0          0.4  29  10   0  11      0.78
6        6.0          0.6  24  20   0   6      0.88
7        6.0          0.8  16  31   0   3      0.94
8        9.0          0.2  26  12   0  12      0.76
9        9.0          0.4  16  19   0  15      0.70
10       9.0          0.6   9  35   0   6      0.88
11       9.0          0.8   6  39   0   5      0.90
12      12.0          0.2  21  11   0  18      0.64
13      12.0          0.4  15  26   0   9      0.82
14      12.0          0.6   8  33   0   9      0.82
15      12.0          0.8   1  45   0   4      0.92
16      15.0          0.2  14  21   0  15      0.70
17      15.0          0.4  13  20   0  17      0.66
18      15.0

In [95]:
detail_input_dir = result_dir + "/detailed_result"
combine_detail_result_path = detail_input_dir + "/combined_detail_result.csv"
combine_detail_result(detail_input_dir, combine_detail_result_path)

input_path = combine_detail_result_path
output_path = result_dir + "/correctness_per_task.csv"

check_correntness(input_path, output_path)

     numTasks  utilization  TP  TN  FP  FN  accuracy
0         0.2          0.0  41   2   0   2  0.955556
1         0.2          1.0  43   0   0   2  0.955556
2         0.2          2.0  43   0   0   2  0.955556
3         0.2          3.0  42   1   0   2  0.955556
4         0.2          4.0  44   1   0   0  1.000000
..        ...          ...  ..  ..  ..  ..       ...
195       0.8         45.0  36   5   0   4  0.911111
196       0.8         46.0  35   5   0   5  0.888889
197       0.8         47.0  36   6   0   3  0.933333
198       0.8         48.0  30  12   0   3  0.933333
199       0.8         49.0  31   8   0   6  0.866667

[200 rows x 7 columns]


In [96]:
df = pd.read_csv(combine_detail_result_path, sep=",")

# print rows with simulator_schedulability = False and proposed_schedulability = True
print(df[(df['simulator_schedulability'] == False) & (df['proposed_schedulability'] == True)])

        name  WCRT_by_simulator  simulator_schedulability  WCRT_by_proposed   
7348   task8             543827                     False            393386  \
7389   task1             714430                     False            596297   
7455  task10             334650                     False            274578   
8000  task12             471906                     False            399036   
8003  task15             512984                     False            399036   
8583   task4             685678                     False            488663   
8712  task10             312124                     False            292679   

      proposed_schedulability  dataset_index  numCores  numTasks  utilization  
7348                     True              1         9       0.8           13  
7389                     True              1        12       0.8           14  
7455                     True              1        15       0.8           15  
8000                     True              1   