In [1]:
import os


In [63]:
import glob

# Define the pattern to match log files
pattern = "../results/baselines/*/*_baseline.log"
#pattern = "../results/baselines/*/*_lgbm.log"

# Use glob to find all matching log files
log_files = glob.glob(pattern)

# Print the list of log files
for log_file in log_files:
    print(log_file)



../results/baselines/rel-f1/rel-f1-qualifying_baseline.log
../results/baselines/rel-f1/rel-f1-dnf_baseline.log
../results/baselines/rel-f1/rel-f1-position_baseline.log
../results/baselines/rel-stackex/rel-stackex-badges_baseline.log
../results/baselines/rel-stackex/rel-stackex-engage_baseline.log
../results/baselines/rel-stackex/rel-stackex-votes_baseline.log
../results/baselines/rel-amazon/rel-amazon-ltv_baseline.log
../results/baselines/rel-amazon/rel-amazon-product-churn_baseline.log
../results/baselines/rel-amazon/rel-amazon-churn_baseline.log
../results/baselines/rel-amazon/rel-amazon-product-ltv_baseline.log


In [69]:
import pandas as pd
import re
from statistics import mean, stdev


def whole_file():
    # Initialize lists to store the computed statistics for each file
    val_metrics_list = []
    test_metrics_list = []
    file_names = []
    files_without_matches = []

    # Iterate through each log file
    for log_file in log_files:
        with open(log_file, 'r') as f:
            content = f.read()

        # Use regular expressions to extract metrics for all occurrences
        val_matches = re.findall(r'majority:\nVal: (.+)', content)
        test_matches = re.findall(r'Test: (.+)', content)

        # Check if there are any matches
        if not val_matches and not test_matches:
            files_without_matches.append(log_file)
            continue


        val_metrics = []
        test_metrics = []

        # Parse and append metrics for validation
        for match in val_matches:
            val_metrics.append(eval(match))

        # Parse and append metrics for test
        for match in test_matches:
            test_metrics.append(eval(match))

        # Compute mean and standard deviation for val metrics
        if val_metrics:
            val_means = {k: mean([d[k] for d in val_metrics]) for k in val_metrics[0]}
            val_stdevs = {k: stdev([d[k] for d in val_metrics]) for k in val_metrics[0]}
            val_metrics_list.append((val_means, val_stdevs))

        # Compute mean and standard deviation for test metrics
        if test_metrics:
            test_means = {k: mean([d[k] for d in test_metrics]) for k in test_metrics[0]}
            test_stdevs = {k: stdev([d[k] for d in test_metrics]) for k in test_metrics[0]}
            test_metrics_list.append((test_means, test_stdevs))

        # Store the filename
        file_names.append(os.path.basename(log_file))

    # Print files without matches
    if files_without_matches:
        print("Log files without exact matches:")
        for file in files_without_matches:
            print(file)


    # Create dataframes to store the computed statistics for each file
    df_val = pd.DataFrame(val_metrics_list, columns=['mean', 'std'])
    df_val['file_name'] = file_names
    df_test = pd.DataFrame(test_metrics_list, columns=['mean', 'std'])
    df_test['file_name'] = file_names
    
    return df_test, df_val


In [62]:
# Display the entire dataframe as a string
idx=0
print(df_val['file_name'].iloc[idx])
print("\nTest Metrics:")
print(df_test.iloc[idx]['mean'])
print(df_test.iloc[idx]['std'])

rel-f1-dnf_lgbm.log

Test Metrics:
{'average_precision': 0.4284579341878245, 'accuracy': 0.7051282051282052, 'f1': 0.0, 'roc_auc': 0.6753828136436832}
{'average_precision': 0.03549766656635324, 'accuracy': 0.0, 'f1': 0.0, 'roc_auc': 0.04028106749475347}


In [118]:
import re
from collections import defaultdict
from statistics import mean

def collect_average_performance(log_file, methods):
    performance = defaultdict(list)
    current_method = None

    with open(log_file, 'r') as f:
        for line in f:
            line = line.strip()
            if line in methods:
                current_method = line[:-1]  # Remove the colon
            elif line.startswith("Test:"):
                match = re.search(r'{.*}', line)
                if match:
                    test_metrics = eval(match.group())
                    performance[current_method].append(test_metrics)
            ##elif line.startswith("Val:"):
            #    match = re.search(r'{.*}', line)
            #    if match:
            #        val_metrics = eval(match.group())
            #        performance[current_method].append(val_metrics)

    average_performance = {}
    for method, metrics_list in performance.items():
        if metrics_list:
            num_seeds = len(metrics_list)
            average_metrics = {k: mean([d[k] for d in metrics_list]) for k in metrics_list[0]}
            std_metrics = {k: stdev([d[k] for d in metrics_list]) for k in metrics_list[0]}
            average_performance[method] = {"average_metrics": average_metrics, "std_metrics": std_metrics, "num_seeds": num_seeds}

    return average_performance


def collect_avg(log_files, methods):
    for log_file in log_files:
        print(f"File: {log_file}")
        average_performance = collect_average_performance(log_file, methods)


        for method, data in average_performance.items():
            print(f"Method: {method}")
            print(f"Average Metrics over {data['num_seeds']} seeds:")
            for metric, value in data['average_metrics'].items():
                print(f"{metric}: {value}")
            print(f"Std Metrics over {data['num_seeds']} seeds:")

            for metric, value in data['std_metrics'].items():
                print(f"{metric}: {value}")
        
            print()


In [119]:
log_files

['../results/baselines/rel-f1/rel-f1-qualifying_baseline.log',
 '../results/baselines/rel-f1/rel-f1-dnf_baseline.log',
 '../results/baselines/rel-f1/rel-f1-position_baseline.log',
 '../results/baselines/rel-stackex/rel-stackex-badges_baseline.log',
 '../results/baselines/rel-stackex/rel-stackex-engage_baseline.log',
 '../results/baselines/rel-stackex/rel-stackex-votes_baseline.log',
 '../results/baselines/rel-amazon/rel-amazon-ltv_baseline.log',
 '../results/baselines/rel-amazon/rel-amazon-product-churn_baseline.log',
 '../results/baselines/rel-amazon/rel-amazon-churn_baseline.log',
 '../results/baselines/rel-amazon/rel-amazon-product-ltv_baseline.log']

In [120]:
classif_idx = [2,5,6,9]


#methods =  ("random:", "majority:")
methods = ("entity_median:", "entity_mean:", "global_mean:", "global_median:", "global_zero:")
collect_avg([log_files[i] for i in classif_idx], methods)

File: ../results/baselines/rel-f1/rel-f1-position_baseline.log
Method: global_zero
Average Metrics over 5 seeds:
mae: 11.926206140350878
rmse: 13.014703602787927
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

Method: global_mean
Average Metrics over 5 seeds:
mae: 4.512880528786756
rmse: 5.512014024135418
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

Method: global_median
Average Metrics over 5 seeds:
mae: 4.399100877192982
rmse: 5.319882538110992
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

Method: entity_mean
Average Metrics over 5 seeds:
mae: 8.501357509542604
rmse: 10.211046868024601
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

Method: entity_median
Average Metrics over 5 seeds:
mae: 8.518508771929824
rmse: 10.22265948741393
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

File: ../results/baselines/rel-stackex/rel-stackex-votes_baseline.log
Method: global_zero
Average Metrics over 5 seeds:
mae: 0.4222357569467319
rmse: 1.7800068692241051
Std Metrics over 5 seeds:
mae: 0.0
rmse: 0.0

