In [26]:
import pandas as pd
import json

# Prepare the result data

# Helper function to parse the answer
def parse_answer_kor(ans):
    return json.loads(ans.strip("```").lstrip("json").replace("\n", "").replace(" ", "")).get("가장_적합한_HS_코드", "").rjust(6, "0")

def parse_answer_eng(ans):
    return json.loads(ans.strip("```").lstrip("json").replace("\n", "").replace(" ", "")).get("BestHSCode", "").rjust(6, "0")

# Load the CSV files
result_eng = "../results/12072329_amazon_result_eng.csv"
result_kor = "../results/12072329_amazon_result_kor.csv"

# Read the CSV files into DataFrames
result_eng = pd.read_csv(result_eng)
result_kor = pd.read_csv(result_kor)

result_eng['Answer'] = result_eng['Answer'].apply(parse_answer_eng)
result_kor['정답'] = result_kor['정답'].apply(parse_answer_kor)

In [34]:
ground_truth = pd.read_csv("amazon_gt.csv")
ground_truth['Ground Truth'] = ground_truth['Ground Truth'].apply(lambda x: str(x).rjust(6, "0"))
ground_truth

Unnamed: 0,PRODUCT_NAME,Ground Truth
0,Nutraj 100% Natural Dried Premium California W...,80232
1,"Cadbury Bournvita 5 Star Magic Health Drink, 7...",220210
2,Sugar Free Green Natural Stevia Jar(200 g),210690
3,"Daawat Pulav, Long Grains, Fluffy Basmati for ...",100620
4,Tata Tea Gold | Assam teas with Gently Rolled ...,90230
5,"Nescafe Classic Coffee Jar, 200 g with Free Re...",90122


In [27]:
result_eng = result_eng.iloc[[0, 1, 2, 3, 4, 7]].reset_index(drop=True).drop(columns=["CleanDescription", "Best3", "ProductName"])
result_kor = result_kor.iloc[[0, 1, 2, 3, 4, 7]].reset_index(drop=True).drop(columns=["CleanDescription", "Best3", "품목명"])

In [35]:
merged_df = pd.concat([ground_truth, result_eng, result_kor], axis=1)

In [37]:
for i, row in merged_df.iterrows():
    # Create a dictionary from the row
    gt = merged_df.loc[i, "Ground Truth"]
    eng = merged_df.loc[i, "Answer"]
    kor = merged_df.loc[i, "정답"]
    merged_df.loc[i, "eng_Match_2"] = eng[:2] == gt[:2]
    merged_df.loc[i, "eng_Match_4"] = eng[:4] == gt[:4]
    merged_df.loc[i, "eng_Match_6"] = eng == gt
    merged_df.loc[i, "kor_Match_2"] = kor[:2] == gt[:2]
    merged_df.loc[i, "kor_Match_4"] = kor[:4] == gt[:4]
    merged_df.loc[i, "kor_Match_6"] = kor == gt

In [40]:
merged_df.to_csv("amazon_merged.csv", index=False)

In [41]:
# 각 매치별 True 비율 계산
print("Amaznon eng vs. gt")
for match_type in ["eng_Match_2", "eng_Match_4", "eng_Match_6"]:
    true_ratio = merged_df[match_type].mean()
    print(f"{match_type} True 비율: {true_ratio:.2f}")

print("Amaznon kor vs. gt")
for match_type in ["kor_Match_2", "kor_Match_4", "kor_Match_6"]:
    true_ratio = merged_df[match_type].mean()
    print(f"{match_type} True 비율: {true_ratio:.2f}")

# eng 전체 True 비율 계산
eng_total_true_ratio = merged_df[["eng_Match_2", "eng_Match_4", "eng_Match_6"]].values.sum() / (3 * len(merged_df))
print(f"Eng 전체 True 비율: {eng_total_true_ratio:.2f}")

# kor 전체 True 비율 계산
kor_total_true_ratio = merged_df[["kor_Match_2", "kor_Match_4", "kor_Match_6"]].values.sum() / (3 * len(merged_df))
print(f"Kor 전체 True 비율: {kor_total_true_ratio:.2f}")

Amaznon eng vs. gt
eng_Match_2 True 비율: 0.50
eng_Match_4 True 비율: 0.50
eng_Match_6 True 비율: 0.33
Amaznon kor vs. gt
kor_Match_2 True 비율: 0.83
kor_Match_4 True 비율: 0.67
kor_Match_6 True 비율: 0.33
Eng 전체 True 비율: 0.44
Kor 전체 True 비율: 0.61
