In [1]:
import pandas as pd
import numpy as np

# Step 1: Load the CSV files
# Load the Entities CSV with specified encoding
entities_df = pd.read_csv('Evaluation - Entities.csv', encoding='ISO-8859-1', header=[0, 1])

# Load the Relationships CSV with specified encoding
relationships_df = pd.read_csv('Evaluation - Relationships.csv', encoding='ISO-8859-1', header=[0, 1])

# The rest of your code remains unchanged

In [2]:
# Step 3: Process the Entities CSV
entity_columns = entities_df.columns.levels[0][1:]  # Exclude 'OSCTI Name'

print(f"Entities DataFrame columns:\n{entity_columns}")
# Step 4: Process the Relationships CSV
relationship_columns = relationships_df.columns.levels[0][1:]  # Exclude 'OSCTI Name'
print(f"Relationships DataFrame columns:\n{relationship_columns}")

# Print column names to inspect them
print("Entities DataFrame columns:")
print(entities_df.columns)
print("\nRelationships DataFrame columns:")
print(relationships_df.columns)

Entities DataFrame columns:
Index(['Detection Field Name', 'IoC', 'Log Source', 'OSCTI Name', 'Other',
       'Sub-Technique', 'Tactic', 'Technique'],
      dtype='object')
Relationships DataFrame columns:
Index(['API Call, IoC', 'API Call, Log Source', 'API Call, Other',
       'API Call, Sub-technique', 'API Call, Tactic', 'API Call, Technique',
       'Detection Field Name, Detection Entity',
       'Detection Field Name,Detection Entity', 'OSCTI Name'],
      dtype='object')
Entities DataFrame columns:
MultiIndex([(          'OSCTI Name', 'OSCTI Name'),
            (              'Tactic',    'Support'),
            (              'Tactic',  'Precision'),
            (              'Tactic',     'Recall'),
            (              'Tactic',   'F1-score'),
            (           'Technique',    'Support'),
            (           'Technique',  'Precision'),
            (           'Technique',     'Recall'),
            (           'Technique',   'F1-score'),
            (       

In [13]:
# Step 2: Define helper functions for aggregation
def aggregate_metrics(group):
    """
    Aggregate metrics for a group of columns.
    We'll use weighted averages based on the 'Support' column.
    """
    support = group['Support'].sum()
    if support == 0:
        return pd.Series({'Support': 0, 'Precision': 0, 'Recall': 0, 'F1-score': 0})
    precision = (group['Support'] * group['Precision']).sum() / support
    recall = (group['Support'] * group['Recall']).sum() / support
    f1 = (group['Support'] * group['F1-score']).sum() / support
    return pd.Series({
        'Support': support,
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1
    })
# Step 3: Filter and process entities
detection_entities = ['Detection Field Name', 'Log Source', 'API Call', 'IoC', 'Other']
mitre_entities = ['Tactic', 'Technique', 'Sub-Technique']

detection_entities_df = entities_df[['OSCTI Name'] + detection_entities]
mitre_entities_df = entities_df[['OSCTI Name'] + mitre_entities]

# Step 4: Filter and process relationships
detection_relationships = ['Detection Field Name,Detection Entity', 'API Call, API Source', 'API Call, Log Source', 'API Call, IoC', 'API Call, Other']
mitre_relationships = ['API Call, Tactic', 'API Call, Technique', 'API Call, Sub-technique']

detection_relationships_df = relationships_df[['OSCTI Name'] + detection_relationships]
mitre_relationships_df = relationships_df[['OSCTI Name'] + mitre_relationships]

# Add ID column (remember to start from 1, and update both rows of the header)
detection_entities_df.insert(1, ('ID', ''), range(1, len(detection_entities_df) + 1))
mitre_entities_df.insert(1, ('ID', ''), range(1, len(mitre_entities_df) + 1))
detection_relationships_df.insert(1, ('ID', ''), range(1, len(detection_relationships_df) + 1))
mitre_relationships_df.insert(1, ('ID', ''), range(1, len(mitre_relationships_df) + 1))

# Step 5: Save filtered DataFrames to CSV
detection_entities_df.to_csv('detection_entities.csv', index=False)
mitre_entities_df.to_csv('mitre_entities.csv', index=False)
detection_relationships_df.to_csv('detection_relationships.csv', index=False)
mitre_relationships_df.to_csv('mitre_relationships.csv', index=False)