In [51]:
import pandas as pd
import numpy as np

def load_and_process_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Set the first column as index
    df.set_index(df.columns[0], inplace=True)
    
    # Extract the header rows
    header_row1 = df.columns
    header_row2 = df.iloc[0]
    
    # Remove the first row (now redundant)
    df = df.iloc[1:]
    
    # Create MultiIndex columns
    multi_index = pd.MultiIndex.from_arrays([header_row1, header_row2])
    df.columns = multi_index
    
    # Convert numeric columns
    for col in df.columns:
        if col[1] in ['Support', 'Precision', 'Recall', 'F1-score']:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    return df

def calculate_group_weighted_average(filtered_df, groups, metrics=['Precision', 'Recall', 'F1-score']):
    total_support = {}
    weighted_sums = {metric: {} for metric in metrics}
    weighted_averages = {}

    for group in groups:
        support_col = (group, 'Support')
        if support_col not in filtered_df.columns:
            print(f"Warning: {support_col} not found in DataFrame. Skipping {group}.")
            continue

        total_support[group] = filtered_df[support_col].sum()
        
        for metric in metrics:
            metric_col = (group, metric)
            if metric_col not in filtered_df.columns:
                print(f"Warning: {metric_col} not found in DataFrame. Skipping.")
                continue

            weighted_sums[metric][group] = (
                filtered_df[metric_col] * filtered_df[support_col]
            ).sum()

    for metric in metrics:
        for group in groups:
            key = f'{group}_{metric}'
            if group in total_support and total_support[group] > 0 and group in weighted_sums[metric]:
                weighted_averages[key] = (
                    weighted_sums[metric][group] / total_support[group]
                )
            else:
                weighted_averages[key] = np.float64(0)

    if not weighted_averages:
        print("Warning: No valid data found for the specified groups and metrics.")
    
    return weighted_averages

class Evaluation:
    def __init__(self, entities_df, relationships_df):
        self.entities_df = entities_df
        self.relationships_df = relationships_df

    def filter_results(self, to_filter, data_type='entities'):
        if data_type == 'entities':
            df = self.entities_df
        elif data_type == 'relationships':
            df = self.relationships_df
        else:
            raise ValueError("data_type must be 'entities' or 'relationships'")

        # Filter the columns based on to_filter list
        filtered_columns = [col for col in df.columns if col[0] in to_filter]

        if not filtered_columns:
            print(f"Warning: No columns found for the specified filter: {to_filter}")
            return pd.DataFrame()  # Return an empty DataFrame

        # Create a filtered DataFrame
        filtered_df = df.loc[:, filtered_columns]

        return filtered_df

    def calculate_weighted_average(self, to_filter=None, data_type='entities', metrics=['Precision', 'Recall', 'F1-score']):
        if data_type == 'entities':
            df = self.entities_df
        elif data_type == 'relationships':
            df = self.relationships_df
        else:
            raise ValueError("data_type must be 'entities' or 'relationships'")

        if to_filter:
            df = self.filter_results(to_filter, data_type)

        if df.empty:
            print(f"Warning: No data available for the specified filter: {to_filter}")
            return {}

        groups = to_filter if to_filter else df.columns.get_level_values(0).unique().tolist()

        return calculate_group_weighted_average(df, groups, metrics)

# Load the CSV files
entities_data = load_and_process_csv('Evaluation - Entities.csv')
relationships_data = load_and_process_csv('Evaluation - Relationships.csv')

# Create an instance of the Evaluation class
evaluation = Evaluation(entities_data, relationships_data)

# Example usage
to_filter_entities = ['API Call', 'IoC', 'Tactic', 'Technique', 'Sub-Technique', 'Other']
entities_avg = evaluation.calculate_weighted_average(to_filter_entities, data_type='entities')
print("Entities Weighted Averages:")
print(entities_avg)

to_filter_relationships = ['API Call, Tactic', 'API Call , Technique', 'API Call, Sub-technique', 'Detection Field Name,Detection Entity']
relationships_avg = evaluation.calculate_weighted_average(to_filter_relationships, data_type='relationships')
print("\nRelationships Weighted Averages:")
print(relationships_avg)

# MITRE entities
mitre_entities = ['Tactic', 'Technique', 'Sub-Technique']
mitre_entities_avg = evaluation.calculate_weighted_average(mitre_entities, data_type='entities')
print("\nMITRE Entities Weighted Averages:")
print(mitre_entities_avg)

# MITRE relationships
mitre_relationships = ['API Call, Tactic', 'API Call , Technique', 'API Call, Sub-technique']
mitre_relationships_avg = evaluation.calculate_weighted_average(mitre_relationships, data_type='relationships')
print("\nMITRE Relationships Weighted Averages:")
print(mitre_relationships_avg)

# Non-MITRE entities
non_mitre_entities = ['API Call', 'Detection Field Name', 'Log Source', 'API Source', 'IoC', 'Other']
non_mitre_entities_avg = evaluation.calculate_weighted_average(non_mitre_entities, data_type='entities')
print("\nNon-MITRE Entities Weighted Averages:")
print(non_mitre_entities_avg)

# Non-MITRE relationships
non_mitre_relationships = [
    'Detection Field Name,Detection Entity',
    'API Call, API Source',
    'API Call, Log Source',
    'API Call, IoC',
    'API Call, Other'
]
non_mitre_relationships_avg = evaluation.calculate_weighted_average(non_mitre_relationships, data_type='relationships')
print("\nNon-MITRE Relationships Weighted Averages:")
print(non_mitre_relationships_avg)

Entities Weighted Averages:
{'API Call_Precision': np.float64(0.0), 'IoC_Precision': np.float64(0.0), 'Tactic_Precision': np.float64(0.0), 'Technique_Precision': np.float64(0.0), 'Sub-Technique_Precision': np.float64(0.0), 'Other_Precision': np.float64(0.0), 'API Call_Recall': np.float64(0.0), 'IoC_Recall': np.float64(0.0), 'Tactic_Recall': np.float64(0.0), 'Technique_Recall': np.float64(0.0), 'Sub-Technique_Recall': np.float64(0.0), 'Other_Recall': np.float64(0.0), 'API Call_F1-score': np.float64(0.0), 'IoC_F1-score': np.float64(0.0), 'Tactic_F1-score': np.float64(0.0), 'Technique_F1-score': np.float64(0.0), 'Sub-Technique_F1-score': np.float64(0.0), 'Other_F1-score': np.float64(0.0)}

Relationships Weighted Averages:
{'API Call, Tactic_Precision': np.float64(0.0), 'API Call , Technique_Precision': np.float64(0.0), 'API Call, Sub-technique_Precision': np.float64(0.0), 'Detection Field Name,Detection Entity_Precision': np.float64(0.0), 'API Call, Tactic_Recall': np.float64(0.0), 'API C

In [None]:
import pandas as pd



# Evaluation class
class Evaluation:
    def __init__(self, entities_df, relationships_df):
        self.entities_df = entities_df
        self.relationships_df = relationships_df

    def filter_results(self, to_filter, data_type='entities'):
        if data_type == 'entities':
            df = self.entities_df
        elif data_type == 'relationships':
            df = self.relationships_df
        else:
            raise ValueError("data_type must be 'entities' or 'relationships'")

        # Ensure 'OSCTI Name' is included
        filtered_columns = [('OSCTI Name', 'OSCTI Name')]

        # Filter the columns based on to_filter list
        for entity in to_filter:
            cols = [col for col in df.columns if col[0] == entity and col[1] != 'OSCTI Name']
            filtered_columns.extend(cols)

        # Create a filtered DataFrame
        try:
            filtered_df = df.loc[:, filtered_columns]
        except KeyError as e:
            print(f"Error: {e}")
            print("Available columns are:")
            print(df.columns)
            raise

        return filtered_df

    def calculate_weighted_average(self, to_filter=None, data_type='entities'):
        if data_type == 'entities':
            df = self.entities_df
        elif data_type == 'relationships':
            df = self.relationships_df
        else:
            raise ValueError("data_type must be 'entities' or 'relationships'")

        if to_filter:
            df = self.filter_results(to_filter, data_type)

        return calculate_weighted_average(df)

    

# Usage example
evaluation = Evaluation(entities_data, relationships_data)

to_filter_entities = ['Tactic', 'Technique']
filtered_entities = evaluation.filter_results(to_filter_entities, data_type='entities')
print("\nFiltered Entities DataFrame:")
print(filtered_entities)

to_filter_relationships = ['API Call, Tactic', 'API Call , Technique']
filtered_relationships = evaluation.filter_results(to_filter_relationships, data_type='relationships')
print("\nFiltered Relationships DataFrame:")
print(filtered_relationships)

filtered_entities_avg = evaluation.calculate_weighted_average(to_filter_entities, data_type='entities')
print("\nFiltered Entities Weighted Averages:")
print(filtered_entities_avg)

filtered_relationships_avg = evaluation.calculate_weighted_average(to_filter_relationships, data_type='relationships')
print("\nFiltered Relationships Weighted Averages:")
print(filtered_relationships_avg)

# Generate LaTeX table for filtered entities
latex_table_entities = evaluation.to_latex(filtered_entities)
print("\nLaTeX Table for Filtered Entities:")
print(latex_table_entities)

# Generate LaTeX table for filtered relationships
latex_table_relationships = evaluation.to_latex(filtered_relationships)
print("\nLaTeX Table for Filtered Relationships:")
print(latex_table_relationships)


In [None]:
# For entities
to_filter_entities = ['API Call', 'IoC', 'Request Parameter', 'Tactic', 'Technique', 'Sub-Technique', 'Other']
filtered_entities = evaluation.filter_results(to_filter_entities, data_type='entities')
latex_table_entities = evaluation.to_latex(filtered_entities)
print(latex_table_entities)

# For relationships
to_filter_relationships = ['API Call ↔ Detection Entity', 'API Call ↔ IoC', 'API Call ↔ TTP', 'Other Relationships']
filtered_relationships = evaluation.filter_results(to_filter_relationships, data_type='relationships')
latex_table_relationships = evaluation.to_latex(filtered_relationships)
print(latex_table_relationships)


In [None]:
mitre_entities = ['Tactic', 'Technique', 'Sub-Technique']
filtered_mitre_entities = evaluation.filter_results(mitre_entities, data_type='entities')

mitre_relationships = ['API Call, Tactic', 'API Call, Technique', 'API Call, Sub-technique']
filtered_mitre_relationships = evaluation.filter_results(mitre_relationships, data_type='relationships')

non_mitre_entities = ['API Call', 'Detection Field Name', 'Log Source', 'API Source', 'IoC', 'Other']
filtered_non_mitre_entities = evaluation.filter_results(non_mitre_entities, data_type='entities')
non_mitre_relationships = [
    'Detection Field Name, Detection Entity',
    'API Call, API Source',
    'API Call, Log Source',
    'API Call, IoC',
    'API Call, Other'
]
filtered_non_mitre_relationships = evaluation.filter_results(non_mitre_relationships, data_type='relationships')



In [44]:
import numpy as np
import pandas as pd

def calculate_group_weighted_average(filtered_df):
    total_support = {}
    weighted_sums = {}
    
    # Define the groups and their corresponding metrics
    groups = ['Tactic', 'Technique', 'Sub-Technique']
    metrics = ['Precision', 'Recall', 'F1-score']
    
    for group in groups:
        support_col = (group, 'Support')
        if support_col in filtered_df.columns:
            total_support[group] = filtered_df[support_col].sum()
            for metric in metrics:
                metric_col = (group, metric)
                if metric_col in filtered_df.columns:
                    metric_sum = (filtered_df[metric_col] * filtered_df[support_col]).sum()
                    if metric not in weighted_sums:
                        weighted_sums[metric] = {}
                    weighted_sums[metric][group] = metric_sum
                else:
                    print(f"KeyError: {metric_col} not found in DataFrame")
        else:
            print(f"KeyError: {support_col} not found in DataFrame")
    
    weighted_averages = {}
    for metric in metrics:
        for group in groups:
            if group in total_support and total_support[group] > 0:
                key = f'{group}_{metric}'
                weighted_averages[key] = weighted_sums[metric][group] / total_support[group]
            else:
                weighted_averages[key] = np.float64(0)
    
    return weighted_averages

# Assuming you've already loaded your data as shown in your code
# and it's stored in the variable 'filtered_mitre_entities'

# Call the function with the DataFrame
mitre_entities_avg = calculate_group_weighted_average(filtered_mitre_entities)
print(mitre_entities_avg)

{'Tactic_Precision': np.float64(0.476), 'Technique_Precision': np.float64(0.6088775510204082), 'Sub-Technique_Precision': np.float64(0.6778571428571428), 'Tactic_Recall': np.float64(0.3634545454545455), 'Technique_Recall': np.float64(0.7861224489795919), 'Sub-Technique_Recall': np.float64(0.9042857142857144), 'Tactic_F1-score': np.float64(0.4), 'Technique_F1-score': np.float64(0.676530612244898), 'Sub-Technique_F1-score': np.float64(0.7404761904761905)}


In [45]:
print(calculate_group_weighted_average(filtered_non_mitre_entities))

KeyError: ('Tactic', 'Support') not found in DataFrame
KeyError: ('Technique', 'Support') not found in DataFrame
KeyError: ('Sub-Technique', 'Support') not found in DataFrame


UnboundLocalError: cannot access local variable 'key' where it is not associated with a value

In [None]:
# For MITRE Entities
latex_mitre_entities = evaluation.to_latex(filtered_mitre_entities)
print(latex_mitre_entities)

# For MITRE Relationships
latex_mitre_relationships = evaluation.to_latex(filtered_mitre_relationships)
print(latex_mitre_relationships)

# For Non-MITRE Entities
latex_non_mitre_entities = evaluation.to_latex(filtered_non_mitre_entities)
print(latex_non_mitre_entities)

# For Non-MITRE Relationships
latex_non_mitre_relationships = evaluation.to_latex(filtered_non_mitre_relationships)
print(latex_non_mitre_relationships)


In [None]:
# Add print statements to check the data loading
print("Entities Raw Data:")
print(entities_raw.head())

print("Relationships Raw Data:")
print(relationships_raw.head())

# Add print statements to check the extracted headers
print("Entities Headers:")
print(entities_headers)

print("Relationships Headers:")
print(relationships_headers)

# Add print statements to check the processed data
print("Entities Data:")
print(entities_data.head())

print("Relationships Data:")
print(relationships_data.head())

# Add print statements to check the weighted averages calculation
entities_weighted_avg = calculate_weighted_average(entities_data)
print("Entities Weighted Averages:")
print(entities_weighted_avg)

relationships_weighted_avg = calculate_weighted_average(relationships_data)
print("\nRelationships Weighted Averages:")
print(relationships_weighted_avg)

In [None]:
print(filtered_mitre_entities.columns)


In [None]:
# Print the columns of the DataFrame to check if the expected columns are present
print(filtered_mitre_entities.columns)
print(filtered_mitre_relationships.columns)
print(filtered_non_mitre_entities.columns)
print(filtered_non_mitre_relationships.columns)
