In [34]:
import pandas as pd
import os
from pathlib import Path
from typing import List, Tuple

def evaluate_candidate_output(ground_truth_path:str, 
                              candidate_name: str, 
                              candidate_output_path: str) -> Tuple[float, List]:
    detailed_results = [] 
    
    # Load the output_1 file and the output_2 file
    output_1 = pd.read_csv(os.path.join(candidate_output_path, 'output_1.csv'))
    output_2 = pd.read_csv(os.path.join(candidate_output_path, 'output_2.csv'))

    #### Evaluate the output_1 file ####
    print(f"Evaluating candidate: {candidate_name}")

    # Check if the output_1 has the expected columns
    is_output_1_correct = output_1.columns.tolist() == ["Samples","Avg_Debt","Unique_Months","SSN_Count"]
    score_output_1 = 0
    if is_output_1_correct:
        print("output_1 columns are correct")
        if output_1["Samples"].iloc[0] == 44207:
            detailed_results.append(10)
            score_output_1 += detailed_results[-1]
            print("Samples column is correct")

        if output_1["Avg_Debt"].iloc[0] == 4106:
            detailed_results.append(10)
            score_output_1 += detailed_results[-1]
            print("Average Debt column is correct")

        if output_1["Unique_Months"].iloc[0] == 8:
            detailed_results.append(10)
            score_output_1 += detailed_results[-1]
            print("Average Owners column is correct")

        if output_1["SSN_Count"].iloc[0] == 109:
            detailed_results.append(10)
            score_output_1 += detailed_results[-1]
            print("Unique Genres column is correct")
    else:
            detailed_results.extend([0]*4)

    ### Evaluate the output_2 file ####
    score_output_2 = 0
    is_output_2_correct = output_2.columns.tolist() == ["ID", "Credit_Score"]
    if is_output_2_correct:
        print("output_2 columns are correct")

        # Load the dataset_eval_t dataset that contains the column Price then compare it with the output_2 price column using MAE metric
        # Do the mae for each row then sum all the mae and divide it by the number of rows
        dataset_eval_t = pd.read_csv(os.path.join(ground_truth_path, 'dataset_eval_t.csv'))
        if is_output_2_correct:
            # mae = (dataset_eval_t["Price"] - output_2["Price"]).abs().sum() / len(dataset_eval_t)
            # print("MAE: ", mae)
            matching_rows = (output_2['Credit_Score'] == dataset_eval_t["Credit_Score"]).sum()
            acc = ( matching_rows / len(dataset_eval_t) ) * 100
            # acc = int(acc)
            print(f'Accuracy: {acc}%')
            print(f'Matching rows: {matching_rows}')
    
            # Assign scores based on MAE value
            import bisect
            ranges = [50, 60, 70, 80, 90]
            scores = [10, 20, 50, 60, 70]
    
            index = bisect.bisect_left(ranges, acc)
            if index < len(scores):
                score_output_2 += scores[index]

            detailed_results.append(score_output_2)
    
            print("Took the score: ", score_output_2)
        else:
            detailed_resuts.append(0)


    # Calculate the total score
    total_score = score_output_1 + score_output_2
    print("Total score: ", total_score)

    return total_score, detailed_results

In [36]:
# Take all candidates and evaluate them. Create a final csv with outputs 
from pathlib import Path


# Open the outputs from candidates
candidates_path = Path("../Solutii")
folder_candidati = [f.name for f in candidates_path.iterdir() if f.is_dir()]
print(folder_candidati)

# Open each candidates and run the evaluator
candidate_results = []
for candidate_name in folder_candidati:
    if candidate_name[0] == '.':
        continue 
        
    candidat_path = candidates_path / candidate_name
    
    relative_path = str(Path(candidat_path))
    total_score, detailed_results = evaluate_candidate_output(ground_truth_path="Dataset",
                              candidate_name=candidate_name,
                              candidate_output_path=relative_path)

    candidate_results.append({'Name' : candidate_name, 'Detailed': detailed_results, 'Total' : total_score})

df = pd.DataFrame(candidate_results)
df = df.sort_values(by="Total", ascending=False)  # Sort in descending order (highest score first)
df.to_csv(candidates_path/ "rezultate.csv", index=False)
    
df

['.ipynb_checkpoints', 'Candidat_incepator']
Evaluating candidate: Candidat_incepator
output_1 columns are correct
Samples column is correct
Average Debt column is correct
Average Owners column is correct
Unique Genres column is correct
output_2 columns are correct
Accuracy: 57.10892432982694%
Matching rows: 5049
Took the score:  20
Total score:  60


Unnamed: 0,Name,Detailed,Total
0,Candidat_incepator,"[10, 10, 10, 10, 20]",60
