In [7]:
import pandas as pd

eval_df = pd.read_csv('../Solutii/rezultate.csv')

In [8]:
import pandas as pd
import os
from pathlib import Path
from typing import List, Tuple

def evaluate_candidate_output(ground_truth_path:str, 
                              candidate_name: str, 
                              candidate_output_path: str) -> Tuple[float, List]:
    detailed_results = [] 
    
    # Load answer files
    try:
        output_0 = pd.read_csv(os.path.join(candidate_output_path, 'output_0.csv'))
        output_1 = pd.read_csv(os.path.join(candidate_output_path, 'output_1.csv'))
        output_2 = pd.read_csv(os.path.join(candidate_output_path, 'output_2.csv'))
    except:
        print("Candidat nu are toate fisierele: ", candidate_name)
        return 0, [0, 0, 0, 0, 0, 0]

    #### Evaluate the output_1 file ####
    # print(f"Evaluating candidate: {candidate_name}")

    # Check if the output_0 has the expected columns # 'Samples', 'No. Males', 'Average Duration', 'SeniorUsers'
    is_output_0_correct = output_0.columns.tolist() == ['Samples', 'No. Males', 'Average Duration', 'SeniorUsers']
    score_output_0 = 0
    if is_output_0_correct:
        # print("output_0 - columns are correct")
        # print("restul1", output_0["Samples"].iloc[0])
        if output_0["Samples"].iloc[0] == 9000:
            detailed_results.append(10)
            score_output_0 += detailed_results[-1]
            # print("Samples column is correct")

        if output_0["No. Males"].iloc[0] == 4443:
            detailed_results.append(10)
            score_output_0 += detailed_results[-1]
            # print("No. Males column is correct")

        if output_0["Average Duration"].iloc[0] == 15.51:
            detailed_results.append(10)
            score_output_0 += detailed_results[-1]
            # print("Average Duration column is correct")

        if output_0["SeniorUsers"].iloc[0] == 412:
            detailed_results.append(10)
            score_output_0 += detailed_results[-1]
            # print("SeniorUsers column is correct")
    else:
            detailed_results.extend([0]*4)

    ### Evaluate the output_1 file ####
    score_output_1 = 0
    is_output_1_correct = output_1.columns.tolist() == ["Calories"]
    if is_output_1_correct:
        # print("output_1 columns are correct")

        # Load the dataset_eval_t dataset that contains the column Calories then compare it with the output_2 price column using MAE metric
        # Do the mae for each row then sum all the mae and divide it by the number of rows
        dataset_eval_t = pd.read_csv(os.path.join(ground_truth_path, 'task1_dataset_eval_t.csv'))

        mae_task1 = (dataset_eval_t["Calories"] - output_1["Calories"]).abs().sum() / len(dataset_eval_t)
        # print("MAE: ", mae)

        # Assign scores based on MAE value
        import bisect
        ranges = [8.5, 9, 10, 12]
        scores = [40, 30, 20, 10]

        index = bisect.bisect_left(ranges, mae_task1)
        if index < len(scores):
            score_output_1 += scores[index]

        detailed_results.append(score_output_1)

        # print("Took the score: ", score_output_1)
    else:
        detailed_results.append(0)

    score_output_2 = 0
    is_output_2_correct = output_2.columns.tolist() == ["Calories"]
    if is_output_2_correct:
        # print("output_2 columns are correct")

        # Load the dataset_eval_t dataset that contains the column Calories then compare it with the output_2 price column using MAE metric
        # Do the mae for each row then sum all the mae and divide it by the number of rows
        dataset_eval_t = pd.read_csv(os.path.join(ground_truth_path, 'task2_dataset_eval_t.csv'))

        mae_task2 = (dataset_eval_t["Calories"] - output_2["Calories"]).abs().sum() / len(dataset_eval_t)
        # print("MAE: ", mae)

        # Assign scores based on MAE value
        import bisect
        ranges = [18, 23, 26, 30]
        scores = [20, 15, 10, 5]

        index = bisect.bisect_left(ranges, mae_task2)
        if index < len(scores):
            score_output_2 += scores[index]

        detailed_results.append(score_output_2)

        # print("Took the score: ", score_output_2)
    else:
        detailed_results.append(0)


    # Calculate the total score
    total_score = score_output_0 + score_output_1 + score_output_2
    # print("Total score: ", total_score)

    return score_output_0, mae_task1, mae_task2

In [None]:
import json
import shutil

candidates_path = Path("../Solutii")

candidate_results = []

for index, row in eval_df.iterrows():
    if row['Total'] > 80:
        candidat_name = row['Name']

        candidat_path = candidates_path / candidat_name

        relative_path = str(Path(candidat_path))
        score_output_0, mae_task1, mae_task2 = evaluate_candidate_output(ground_truth_path="Dataset",
                                candidate_name=candidat_name,
                                candidate_output_path=relative_path)
        
        if candidat_name not in ['Candidat_avansat', 'Candidat_incepator', 'Candiadat_ChatGpt']:
        
            json_path = os.path.join(candidat_path, 'config.json')
            with open(json_path, "r") as file:
                experiment_config = json.load(file) 

            new_json_path = os.path.join(candidat_path, candidat_name + '.json')

            shutil.copy(json_path, new_json_path)

            candidate_no = experiment_config['candidate_no']
            DROP_COLUMNS = experiment_config['DROP_COLUMNS']
            NORMALIZATIONS = experiment_config['NORMALIZATIONS']
            HIDDEN_LAYERS = experiment_config['HIDDEN_LAYERS']
            iterations = experiment_config['iterations']
            solver = experiment_config['solver']
            lr_init = experiment_config['lr_init']
            momentum = experiment_config['momentum'] if solver == 'sgd' else None
            lr = experiment_config['lr']
        
        else:
            candidate_no = 'NA' # experiment_config['candidate_no']
            DROP_COLUMNS = 'NA'# experiment_config['DROP_COLUMNS']
            NORMALIZATIONS = 'NA'# experiment_config['NORMALIZATIONS']
            HIDDEN_LAYERS = 'NA' # experiment_config['HIDDEN_LAYERS']
            iterations ='NA' # experiment_config['iterations']
            solver ='NA' # experiment_config['solver']
            lr_init = 'NA'# experiment_config['lr_init']
            momentum = 'NA'# experiment_config['momentum'] if solver == 'sgd' else None
            lr = 'NA'# experiment_config['lr']

        candidate_results.append({
                'Name': candidat_name,
                'total_score': row['Total'] ,
                'mae_task1': mae_task1,
                'mae_task2': mae_task2,
                'candidate_no': candidate_no,
                'DROP_COLUMNS': DROP_COLUMNS,
                'NORMALIZATIONS': NORMALIZATIONS,
                'HIDDEN_LAYERS': HIDDEN_LAYERS,
                'iterations': iterations,
                'solver': solver,
                'lr_init': lr_init,
                'momentum': momentum,
                'lr': lr
            })

df = pd.DataFrame(candidate_results)
df = df.sort_values(by="total_score", ascending=False)  # Sort in descending order (highest score first)
df.to_csv("rezultate_indepth.csv", index=False)
        

KeyError: 'Total'