In [None]:
# Import required libraries
import os
import pickle
import json
import numpy as np
from itertools import product
from sklearn.metrics import precision_recall_curve, auc
from run import symbol_detection_pipeline


In [None]:
# Load templates and reference colors
pickle_path = "data/templates.pkl"

with open(pickle_path, "rb") as f:
    reference_colors, all_templates = pickle.load(f)

print(f"Loaded {len(all_templates)} templates")
print(f"Reference colors shape: {np.array(reference_colors).shape}")


In [None]:
# Load ground truth
with open("data/groundtruth.json", "r") as f:
    ground_truth = json.load(f)

print(f"Ground truth loaded for {len(ground_truth)} images")
print("Sample ground truth:", dict(list(ground_truth.items())[:3]))


In [None]:
# Get list of test images
image_dir = "data/images/"
test_images = [f for f in os.listdir(image_dir) if f.endswith(('.jpeg', '.jpg', '.png'))]
test_images.sort()

print(f"Found {len(test_images)} test images")
print("Test images:", test_images[:5], "...")


In [None]:
# Define parameter grids (all except ncc_threshold)
param_grid = {
    'rgb_threshold': [0.4], 
    'black_val_threshold': [50], 
    'white_sat_threshold': [0.1], 
    'white_val_threshold': [100], 
    'min_line_length_percent': [0.05], 
    'adjacency_radius': [5], 
    'dedup_thresh': [100], 
    'scale': [2],
    'num_octaves': [4], 
    'angle_tolerance': [3], 
    'distance_tolerance': [3] 
}

# NCC threshold range 
ncc_thresholds = np.arange(0.75, 0.96, 0.05) 

print("Parameter grid:")
for param, values in param_grid.items():
    print(f"  {param}: {values}")
print(f"\nNCC thresholds: {ncc_thresholds}")

# Calculate total number of experiments
total_combinations = 1
for values in param_grid.values():
    total_combinations *= len(values)
print(f"\nTotal parameter combinations: {total_combinations}")
print(f"Total experiments (with NCC variations): {total_combinations * len(ncc_thresholds)}")


In [None]:
# Helper function to extract template names from results
def extract_template_names(results):
    """Extract template names from pipeline results"""
    if 'matches' not in results:
        return []
    return [match['template_name'] for match in results['matches']]


In [None]:
# Helper function to compute precision and recall for a single image
def compute_precision_recall(predicted_templates, ground_truth_templates):
    """Compute precision and recall for a single image"""
    if not predicted_templates and not ground_truth_templates:
        return 1.0, 1.0 
    if not predicted_templates:
        return 1.0, 0.0 
    if not ground_truth_templates:
        return 0.0, 1.0 
    
    predicted_set = set(predicted_templates)
    ground_truth_set = set(ground_truth_templates)
    
    true_positives = len(predicted_set.intersection(ground_truth_set))
    
    precision = true_positives / len(predicted_set) if predicted_set else 0.0
    recall = true_positives / len(ground_truth_set) if ground_truth_set else 0.0
    
    return precision, recall


In [None]:
# Helper function to evaluate a single experiment across all NCC thresholds
def evaluate_experiment(params, experiment_id):
    """Evaluate a single parameter combination across all NCC thresholds"""
    print(f"\n=== Experiment {experiment_id} ===")
    print(f"Parameters: {params}")
    
    precision_scores = []
    recall_scores = []
    
    for ncc_idx, ncc_threshold in enumerate(ncc_thresholds):
        print(f"  NCC threshold {ncc_idx+1}/{len(ncc_thresholds)}: {ncc_threshold:.2f}")
        
       
        image_precisions = []
        image_recalls = []
        
        for img_idx, image_name in enumerate(test_images):
            if img_idx % 5 == 0: 
                print(f"    Processing image {img_idx+1}/{len(test_images)}: {image_name}")
            
            image_path = os.path.join(image_dir, image_name)
            ground_truth_templates = ground_truth.get(image_name, [])
            
            try:
               
                results = symbol_detection_pipeline(
                    image_path=image_path,
                    templates=all_templates,
                    reference_colors=reference_colors,
                    ncc_threshold=ncc_threshold,
                    **params
                )
                
                predicted_templates = extract_template_names(results)
                precision, recall = compute_precision_recall(predicted_templates, ground_truth_templates)
                
                image_precisions.append(precision)
                image_recalls.append(recall)
                
            except Exception as e:
                print(f"    Error processing {image_name}: {e}")
               
                image_precisions.append(0.0)
                image_recalls.append(0.0)
        
       
        avg_precision = np.mean(image_precisions)
        avg_recall = np.mean(image_recalls)
        
        precision_scores.append(avg_precision)
        recall_scores.append(avg_recall)
        
        print(f"    Avg Precision: {avg_precision:.3f}, Avg Recall: {avg_recall:.3f}")
    
   
   
    sorted_indices = np.argsort(recall_scores)
    sorted_recalls = np.array(recall_scores)[sorted_indices]
    sorted_precisions = np.array(precision_scores)[sorted_indices]
    
    auprc = auc(sorted_recalls, sorted_precisions)
    
    print(f"  AUPRC: {auprc:.4f}")
    
    return {
        'experiment_id': experiment_id,
        'parameters': params,
        'ncc_thresholds': ncc_thresholds.tolist(),
        'precision_scores': precision_scores,
        'recall_scores': recall_scores,
        'auprc': auprc
    }


In [None]:
# Main grid search execution
print("Starting grid search...")
print(f"Total combinations to evaluate: {total_combinations}")

results_list = []
experiment_id = 0

# Generate all parameter combinations
param_names = list(param_grid.keys())
param_values = list(param_grid.values())

from tqdm import tqdm

for combination in tqdm(list(product(*param_values)), total=total_combinations, desc="Grid Search Experiments"):
    experiment_id += 1

    params = dict(zip(param_names, combination))

    experiment_result = evaluate_experiment(params, experiment_id)
    results_list.append(experiment_result)

print(f"\nGrid search completed! Evaluated {len(results_list)} experiments.")


In [None]:
# Analyze results
print("\n=== RESULTS SUMMARY ===")

# Sort results by AUPRC
sorted_results = sorted(results_list, key=lambda x: x['auprc'], reverse=True)

print(f"\nTop 5 experiments by AUPRC:")
for i, result in enumerate(sorted_results[:5]):
    print(f"{i+1}. Experiment {result['experiment_id']}: AUPRC = {result['auprc']:.4f}")
    print(f"   Parameters: {result['parameters']}")

print(f"\nWorst 5 experiments by AUPRC:")
for i, result in enumerate(sorted_results[-5:]):
    print(f"{len(sorted_results)-4+i}. Experiment {result['experiment_id']}: AUPRC = {result['auprc']:.4f}")
    print(f"   Parameters: {result['parameters']}")

# Statistics
auprc_scores = [r['auprc'] for r in results_list]
print(f"\nAUPRC Statistics:")
print(f"  Mean: {np.mean(auprc_scores):.4f}")
print(f"  Std:  {np.std(auprc_scores):.4f}")
print(f"  Min:  {np.min(auprc_scores):.4f}")
print(f"  Max:  {np.max(auprc_scores):.4f}")


In [None]:
# Save results to file
output_file = "grid_search_results.json"

with open(output_file, 'w') as f:
    json.dump(results_list, f, indent=2)

print(f"\nResults saved to {output_file}")
print(f"Total experiments saved: {len(results_list)}")
