# FCM Scoring Walkthrough

This notebook demonstrates how to use the FCM scoring utility to compare two Fuzzy Cognitive Maps.

## 1. Setup and Imports

In [None]:
import sys
import os
import pandas as pd
import json

# Add parent directory to path for imports
sys.path.insert(0, '..')

from score_fcms import score_fcm, load_matrix_from_file, matrix_to_json

print("✓ Imports successful")

## 2. Load and Explore Example Data

In [None]:
# Path to example files (in same directory)
fcm1_path = 'fcm1.csv'
fcm2_path = 'fcm2.csv'

print(f"Loading FCM1 from: {fcm1_path}")
print(f"Loading FCM2 from: {fcm2_path}")

# Load the matrices
fcm1_matrix = load_matrix_from_file(fcm1_path)
fcm2_matrix = load_matrix_from_file(fcm2_path)

print(f"\nFCM1 shape: {fcm1_matrix.shape}")
print(f"FCM2 shape: {fcm2_matrix.shape}")

In [None]:
# Explore FCM1
print("FCM1 Matrix:")
print(fcm1_matrix)
print(f"\nNumber of edges: {(fcm1_matrix != 0).sum().sum() // 2}")
print(f"Number of nodes: {len(fcm1_matrix)}")

In [None]:
# Explore FCM2
print("FCM2 Matrix:")
print(fcm2_matrix)
print(f"\nNumber of edges: {(fcm2_matrix != 0).sum().sum() // 2}")
print(f"Number of nodes: {len(fcm2_matrix)}")

In [None]:
# Show file information
print(f"FCM1 (CSV Matrix Format):")
print(f"  - File: {fcm1_path}")
print(f"  - Format: Adjacency matrix (rows/columns = nodes)")
print(f"\nFCM2 (CSV Matrix Format):")
print(f"  - File: {fcm2_path}")
print(f"  - Format: Adjacency matrix (rows/columns = nodes)")
print(f"\nNote: Both CSV and JSON formats are supported!")
print(f"  - CSV: Adjacency matrix format")
print(f"  - JSON: Edge list format with 'edges' array")

## 3. Basic Scoring with Default Parameters

In [None]:
# Score with default parameters
print("Scoring with default parameters...\n")

results = score_fcm(
    fcm1_path=fcm1_path,
    fcm2_path=fcm2_path,
    verbose=True
)

## 4. Parameter Tuning: Testing Different Thresholds

In [None]:
# Test different thresholds
thresholds = [0.5, 0.6, 0.7, 0.8, 0.9]
results_list = []

print("Testing different threshold values...\n")

for threshold in thresholds:
    print(f"Testing threshold={threshold}...")
    result = score_fcm(
        fcm1_path=fcm1_path,
        fcm2_path=fcm2_path,
        threshold=threshold,
        verbose=False
    )
    results_list.append(result)

print("Complete")

In [None]:
# Combine results and display
threshold_results = pd.concat(results_list, ignore_index=True)

print("\nScoring Results for Different Thresholds:")
print("="*80)
display_cols = ['threshold', 'F1', 'Jaccard', 'TP', 'PP', 'FP', 'FN']
print(threshold_results[display_cols].to_string(index=False))

In [None]:
# Find optimal threshold
best_idx = threshold_results['F1'].idxmax()
best_threshold = threshold_results.loc[best_idx, 'threshold']
best_f1 = threshold_results.loc[best_idx, 'F1']

print(f"\nBest F1 Score: {best_f1:.4f}")
print(f"Achieved at threshold: {best_threshold}")

## 5. Interpreting Results

In [None]:
# Understanding the metrics
best_result = results_list[thresholds.index(best_threshold)]

tp = int(best_result['TP'].iloc[0])
pp = int(best_result['PP'].iloc[0])
fp = int(best_result['FP'].iloc[0])
fn = int(best_result['FN'].iloc[0])

print("Understanding the Metrics:")
print("="*50)
print(f"True Positives (TP):      {tp:3d} - Correct edge matches")
print(f"Partial Positives (PP):   {pp:3d} - Edge matches with sign disagreement")
print(f"False Positives (FP):     {fp:3d} - Predicted edges not in reference")
print(f"False Negatives (FN):     {fn:3d} - Reference edges not predicted")
print("="*50)
print(f"\nF1 Score:     {best_result['F1'].iloc[0]:.4f}")
print(f"Jaccard Score: {best_result['Jaccard'].iloc[0]:.4f}")
print(f"\nF1 = 2*TP / (2*TP + FP + FN)")
print(f"   = 2*{tp} / (2*{tp} + {fp} + {fn})")
print(f"   = {2*tp} / {2*tp + fp + fn}")
print(f"   = {best_result['F1'].iloc[0]:.4f}")

## 6. Saving Results in Different Formats

In [None]:
# Save results in both CSV and JSON formats
output_dir = 'results'
os.makedirs(output_dir, exist_ok=True)

print(f"Saving results to {output_dir}...\n")

results_both = score_fcm(
    fcm1_path=fcm1_path,
    fcm2_path=fcm2_path,
    output_dir=output_dir,
    output_format='both',
    verbose=False
)

print("Results saved in both CSV and JSON formats")

In [None]:
# List saved files
import glob

print("Saved files:")
for filepath in glob.glob(os.path.join(output_dir, '*_scoring_results*')):
    filename = os.path.basename(filepath)
    file_size = os.path.getsize(filepath)
    print(f"  - {filename} ({file_size} bytes)")

## 7. Working with Custom FCM Data

In [None]:
# Example: Create a simple custom FCM
custom_fcm = pd.DataFrame(
    {
        'variable_A': [0, 0.8, -0.5],
        'variable_B': [0.7, 0, 0.6],
        'variable_C': [-0.4, 0.9, 0]
    },
    index=['variable_A', 'variable_B', 'variable_C']
)

print("Custom FCM:")
print(custom_fcm)

# Save it as CSV
custom_csv_path = 'custom_fcm.csv'
custom_fcm.to_csv(custom_csv_path)
print(f"\n Saved to {custom_csv_path}")

In [None]:
# Convert FCM matrix to JSON format
custom_json = matrix_to_json(custom_fcm)

print("\nCustom FCM as JSON:")
print(json.dumps(custom_json, indent=2))

# Save it as JSON
custom_json_path = 'custom_fcm.json'
with open(custom_json_path, 'w') as f:
    json.dump(custom_json, f, indent=2)
print(f"\n Saved to {custom_json_path}")

## 8. Comparing FCMs from Two Directories

For batch processing, you can compare all FCMs from two directories that have matching filenames using the `compare_fcm_directories` script.

In [None]:
# Import the directory comparison function
from compare_fcm_directories import compare_directories, find_matching_files

# For this example, let's create two temporary directories with some FCM files
import os
import shutil

# Create test directories
test_dir1 = 'test_fcms_set1'
test_dir2 = 'test_fcms_set2'

os.makedirs(test_dir1, exist_ok=True)
os.makedirs(test_dir2, exist_ok=True)

# Copy some example files to each directory
# (In a real scenario, these directories would already exist with different FCMs)
shutil.copy('fcm1.csv', os.path.join(test_dir1, 'sample1.csv'))
shutil.copy('fcm2.csv', os.path.join(test_dir2, 'sample1.csv'))

print("✓ Created test directories with sample FCMs")
print(f"  {test_dir1}/sample1.csv")
print(f"  {test_dir2}/sample1.csv")

In [None]:
# First, check what matching files were found
matches = find_matching_files(test_dir1, test_dir2)
print(f"Found {len(matches)} matching file pair(s):\n")
for stem, path1, path2 in matches:
    print(f"  {stem}:")
    print(f"    - {path1}")
    print(f"    - {path2}")

In [None]:
# Compare all matching FCMs from the two directories
results = compare_directories(
    dir1=test_dir1,
    dir2=test_dir2,
    output_dir='batch_comparison_results',
    output_format='both',
    threshold=0.5,
    verbose=True
)

print("\n" + "=" * 60)
print("COMPARISON RESULTS")
print("=" * 60)
print(results[['file_pair', 'F1', 'Jaccard', 'TP', 'PP', 'FP', 'FN']])

In [None]:
# View the structure of output files
print("Output structure:")
print("\nbatch_comparison_results/")
for root, dirs, files in os.walk('batch_comparison_results'):
    level = root.replace('batch_comparison_results', '').count(os.sep)
    indent = ' ' * 2 * level
    print(f'{indent}{os.path.basename(root)}/')
    subindent = ' ' * 2 * (level + 1)
    for file in files:
        print(f'{subindent}{file}')

# Clean up test directories
shutil.rmtree(test_dir1)
shutil.rmtree(test_dir2)
print(f"\n✓ Cleaned up test directories")

In [None]:
from compare_fcm_directories import compare_directories, find_matching_files

compare_directories(
    dir1="C:\\Users\\Nbrug\\Desktop\\osw-data",
    dir2="C:\\Users\\Nbrug\\Desktop\\iea_adjacency_matrices_AI",
    output_dir='C:\\Users\\Nbrug\\Desktop\\iea_results',
    output_format='both',
    threshold=0.5,
    verbose=True
)

## 9. Summary

This walkthrough demonstrated:

1. **Loading FCM data** in both CSV and JSON formats
2. **Basic scoring** with default parameters
3. **Parameter tuning** by testing different thresholds
4. **Result interpretation** - understanding TP, PP, FP, FN metrics
5. **Flexible output** - saving results in CSV and/or JSON
6. **Format conversion** - working with custom FCM data
7. **Batch processing** - comparing multiple FCM pairs from two directories

### Key Takeaways:
- **Threshold tuning** is important for getting good results
- **F1 and Jaccard scores** provide different perspectives on matching quality
- **Edge counts matter** - more edges can lead to more false positives
- **Flexible I/O** - use CSV for matrices, JSON for edge lists
- **Batch comparison** - efficiently process multiple FCM pairs with matching filenames