### Find Best Metrics

Loops through each set of 15 epochs for all the cross validation cycles and finds the combination of epochs with the best results given a set of metrics and weights for their importance

In [16]:
import os
import re
import glob
import numpy as np

In [17]:
# Change path to your diagnostic folder here
diagnostics_folder = os.path.join(os.getcwd(),'diagnostics_TRAIN')
metrics = ['accuracy', 'auc']
weights = {'accuracy': 0.3, 'auc': 0.7}

In [21]:

def extract_metrics(file_path, required_metrics):
    """
    Extract specified metrics from a diagnostic file.
    
    `param file_path`: Path to the diagnostic file
    `param required_metrics`: List of metrics to extract
    `return`: A dictionary with the extracted metrics
    """
    metrics = {}
    with open(file_path, 'r') as file:
        content = file.read()
    
    metric_patterns = {
        'precision': r'Precision:\s+(\d\.\d+)',
        'recall': r'Recall:\s+(\d\.\d+)',
        'f1_score': r'F1 Score:\s+(\d\.\d+)',
        'accuracy': r'Accuracy:\s+(\d\.\d+)',
        'auc': r'AUC:\s+(\d\.\d+)'
    }
    
    for metric in required_metrics:
        if metric in metric_patterns:
            match = re.search(metric_patterns[metric], content)
            if match:
                metrics[metric] = float(match.group(1))
    
    return metrics

def calculate_weighted_average(metrics, weights):
    """
    Calculate the weighted average of the metrics.
    
    `param metrics`: Dictionary of metrics
    `param weights`: Dictionary of weights for each metric
    `return`: Weighted average score
    """
    weighted_sum = 0
    total_weight = sum(weights.values())
    
    for metric, weight in weights.items():
        if metric in metrics:
            weighted_sum += metrics[metric] * weight
    
    return weighted_sum / total_weight if total_weight > 0 else 0

def find_best_model(diagnostics_folder, metrics, weights):
    """
    Find the best model based on a weighted average of metrics.
    
    `param diagnostics_folder`: Folder containing the diagnostic files
    `param metrics`: List of metrics to consider
    `param weights`: Dictionary of weights for each metric
    `return`: The best model's diagnostic file and its weighted average score
    """
    diagnostic_files = sorted(glob.glob(os.path.join(diagnostics_folder, 'diagnostics_*.txt')))
    cross_validation_groups = [diagnostic_files[i:i+15] for i in range(0, len(diagnostic_files), 15)]
    
    best_score = -np.inf
    best_group = None
    best_metrics_averages = None
    
    for group in cross_validation_groups:
        group_scores = []
        group_metrics = {metric: [] for metric in metrics}
        
        for file in group:
            metrics_data = extract_metrics(file, metrics)
            score = calculate_weighted_average(metrics_data, weights)
            group_scores.append(score)
            
            for metric in metrics:
                if metric in metrics_data:
                    group_metrics[metric].append(metrics_data[metric])
        
        avg_group_score = np.mean(group_scores)
        
        if avg_group_score > best_score:
            best_score = avg_group_score
            best_group = group
            best_metrics_averages = {metric: np.mean(values) for metric, values in group_metrics.items()}

    return best_group, best_score, best_metrics_averages

In [22]:
best_model, best_score, best_metric_averages = find_best_model(diagnostics_folder, metrics, weights)
print(f"Best model files: {best_model}")
print(f"Best model score: {best_score}")
print(f"Best model metrics averages: {best_metric_averages}")

Best model files: ['c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_140118.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_142726.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_145356.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_152020.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_154658.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_161323.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_163956.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_170649.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_173312.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_175919.txt', 'c:\\Users\\u251245\\CVEpilepsy\\diagnostics_TRAIN\\diagnostics_20240621_182529.txt', 'c:\\Users\\u251245\\CVEpilepsy\\di

In [20]:
for model in best_model:
    print(model)

c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_140118.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_142726.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_145356.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_152020.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_154658.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_161323.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_163956.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_170649.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_173312.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_175919.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_182529.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_20240621_185158.txt
c:\Users\u251245\CVEpilepsy\diagnostics_TRAIN\diagnostics_202406

#### Calculate Anthony's Results

In [16]:
def calculate_metrics_averages(file_path):
    """
    Calculate the averages of all metrics found in a text file.
    
    :param file_path: Path to the text file containing the metrics
    :return: A dictionary with the average values of each metric
    """
    metrics_sum = {}
    metrics_count = {}

    with open(file_path, 'r') as file:
        content = file.read()
    
    # Regular expression to find all metrics
    pattern = re.compile(r'(\w+(?: \w+)*):\s+(\d\.\d+)')
    matches = pattern.findall(content)
    
    for match in matches:
        metric_name = match[0].replace(' ', '_')  # Replace spaces with underscores for consistency
        metric_value = float(match[1])
        
        if metric_name in metrics_sum:
            metrics_sum[metric_name] += metric_value
            metrics_count[metric_name] += 1
        else:
            metrics_sum[metric_name] = metric_value
            metrics_count[metric_name] = 1
    
    # Calculate averages
    metrics_averages = {metric: metrics_sum[metric] / metrics_count[metric] for metric in metrics_sum}
    
    return metrics_averages


In [17]:
file_path_uncropped = 'C:/Users/u251245/CVEpilepsy/anthony_results_i3d_uncropped.txt'
file_path_cropped = 'C:/Users/u251245/CVEpilepsy/anthony_results_i3d_cropped.txt'
averages_uc = calculate_metrics_averages(file_path_uncropped)
averages_c = calculate_metrics_averages(file_path_cropped)
print(f'Uncropped Averages:\n{averages_uc}\n')
print(f'Cropped Averages:\n{averages_c}')

Uncropped Averages:
{'roc_auc': 0.9189666666666668, 'top1_acc': 0.8088791666666665, 'modified_acc': 0.7989062499999998, 'modified_auc': 0.9940958333333333}

Cropped Averages:
{'roc_auc': 0.9471333333333333, 'top1_acc': 0.8334583333333332, 'modified_acc': 0.8250708333333333, 'modified_auc': 0.9983416666666667}
