In [4]:
import pandas as pd
import re

# Step 1: Load the file
file_path = "data/how2sign/models/baseline_6_3_dp03_wd_2/generates/cvpr23.fairseq.i3d.test.how2sign/checkpoint_best.out"  # Update this if your file is in another folder
with open(file_path, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Step 2: Parse the lines
data = {}
for line in lines:
    match = re.match(r"([THDP])-(\d+)\t(.+)", line)
    if match:
        prefix, idx, content = match.groups()
        if idx not in data:
            data[idx] = {}
        if prefix == "T":
            data[idx]["target"] = content.strip()
        elif prefix == "H":
            score, text = content.split("\t", 1)
            data[idx]["hypothesis_score"] = float(score)
            data[idx]["hypothesis"] = text.strip()
        elif prefix == "D":
            score, text = content.split("\t", 1)
            data[idx]["detokenized_score"] = float(score)
            data[idx]["detokenized"] = text.strip()
        elif prefix == "P":
            data[idx]["log_probs"] = content.strip()

# Step 3: Convert to DataFrame
df = pd.DataFrame.from_dict(data, orient="index").reset_index()
df.rename(columns={"index": "id"}, inplace=True)

# Step 4: View the DataFrame
df.head()


Unnamed: 0,id,target,hypothesis_score,hypothesis,detokenized_score,detokenized,log_probs
0,1007,"depending on the focus, if it's a high intense...",-2.045285,"▁if ▁it ' s ▁a ▁high ▁stance , ▁it ' s ▁a ▁har...",-2.045285,"if it's a high stance, it's a hard level of lo...",-0.1075 -1.6243 -1.3291 -0.1094 -4.0136 -0.332...
1,284,so now we have established the hole there and ...,-1.444661,"▁now ▁that ▁we ' ve ▁set ▁the ▁hole , ▁you ▁wa...",-1.444661,"now that we've set the hole, you want to want ...",-0.4107 -0.8630 -1.0612 -1.8549 -0.1124 -1.288...
2,1006,"depending on the focus, if it's a high intense...",-1.620174,"▁also , ▁if ▁he ' s ▁really ▁strong , ▁he ▁can...",-1.620174,"also, if he's really strong, he can only work,...",-2.4092 -2.0466 -0.0952 -1.5975 -1.7126 -0.063...
3,653,thereâs also a few pieces of whatever trash ...,-1.252291,▁also ▁a ▁couple ▁of ▁pieces ▁of ▁whatever ▁it...,-1.252291,"also a couple of pieces of whatever it's here,...",-0.7730 -0.2688 -0.6594 -0.6839 -0.3124 -0.490...
4,347,"you would align both the same way, and what yo...",-1.578244,▁so ▁we ' re ▁going ▁to ▁drill ▁it ▁with ▁a ▁c...,-1.578244,so we're going to drill it with a camera and i...,-1.7456 -0.3773 -0.6059 -0.1138 -0.2297 -0.093...


In [5]:
df.to_csv("parsed_how2sign.csv", index=False)

### Main BLEU Score Overview

| Metric | BLEU | N-gram Precision | BP | Ratio | Hyp Length | Ref Length |
|--------|------|------------------|-----|-------|------------|------------|
| Score  | 7.96 | 35.1/11.3/5.0/2.3 | 0.972 | 0.972 | 43,914 | 45,171 |

### N-gram BLEU Score Breakdown

| BLEU-1 | BLEU-2 | BLEU-3 | BLEU-4 |
|--------|--------|--------|--------|
| 34.10  | 19.37  | 12.19  | 7.96   |

### Regular vs Reduced Metrics Comparison

| Metric Type | BLEU | N-gram Precision | BP | Ratio | Hyp Length | Ref Length | ChrF2++ |
|-------------|------|------------------|-----|-------|------------|------------|---------|
| Regular     | 7.96 | 35.1/11.3/5.0/2.3 | 0.972 | 0.972 | 43,914 | 45,171 | 26.62 |
| Reduced     | 2.20 | 25.4/6.1/1.5/0.5 | 0.685 | 0.725 | 11,141 | 15,363 | 18.78 |