In [1]:
import argparse
import re
from collections import defaultdict
from typing import List
import numpy as np
import nltk
from nltk import edit_distance
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tqdm import tqdm
import json

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = edit_distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Calculate metrics
    metrics = compute_metrics(predicted_text, ground_truth_text)
    
    # Display results
    print("Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text]
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt


MemoryError: 

In [2]:
import argparse
import re
from collections import defaultdict
from typing import List
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tqdm import tqdm
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Calculate metrics
    metrics = compute_metrics(predicted_text, ground_truth_text)
    
    # Display results
    print("Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text]
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Metrics:
edit_dist: 0.8172923883531504
bleu: 0.05015996542225005
meteor: 0.19024693108291338
precision: 0.5539305301645339
recall: 0.5832531280076997
f_measure: 0.5682137834036569
Data saved as data.json


In [3]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\])', latex_content)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$', part):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Metrics:
text metrics:
  edit_dist: 0.818915207689667
  bleu: 0.045156766063108485
  meteor: 0.1989619951492028
  precision: 0.6677704194260485
  recall: 0.5822906641000962
  f_measure: 0.6221079691516709
math metrics:
Data saved as data.json


In [4]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\])', latex_content)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$', part):
            math_parts.append(part)
        else:
            text_parts.append(part)
    print(f"Text parts: {text_parts}")
    print(f"Math parts: {math_parts}")
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Text parts: ['\n\n\\begin{abstract}\nThe aim of this paper is to characterize the so-called ', '-balancing property in the class of generalized quasi-arithmetic means. In general, the \nquestion is \nwhether those elements of a given family of means that possess this property are quasi-arithmetic.\n\nThe first result in the latter direction is due to G. Aumann who showed that a balanced complex mean is necessariliy quasi-arithmetic provided that it \nis analytic. Then Aumann characterized quasi-arithmetic means among Cauchy means in terms of the balancing property. These results date back to the \n1930s. In 2015, Lucio R. Berrone, generalizing balancedness, concluded that a mean having that more general property is quasi-arithmetic if it is \nsymmetric, strict and continuously differentiable. A common feature of these res

Metrics:
text metrics:
  edit_dist: 0.818915207689667
  bleu: 0.045156766063108485
  meteor: 0.1989619951492028
  precision: 0.6677704194260485
  recall: 0.5822906641000962
  f_measure: 0.6221079691516709
math metrics:
Data saved as data.json


In [5]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\})', latex_content)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$', part):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Metrics:
text metrics:
  edit_dist: 0.742186088527552
  bleu: 0.09243182388628177
  meteor: 0.23667745186079855
  precision: 0.6512141280353201
  recall: 0.612668743509865
  f_measure: 0.6313536650615302
math metrics:
  edit_dist: 0.931149996058958
  bleu: 2.1193259951803503e-05
  meteor: 0.00572974124962391
  precision: 0.05714285714285714
  recall: 0.08823529411764706
  f_measure: 0.06936416184971098
Data saved as data.json


In [6]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\})', latex_content, flags=re.DOTALL)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$', part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Metrics:
text metrics:
  edit_dist: 0.8388179119886437
  bleu: 0.011515274755890304
  meteor: 0.12179153160734989
  precision: 0.7476635514018691
  recall: 0.4153686396677051
  f_measure: 0.534045393858478
math metrics:
  edit_dist: 0.837195554504611
  bleu: 0.007973970767794413
  meteor: 0.022347510351536613
  precision: 0.0607661822985469
  recall: 0.3382352941176471
  f_measure: 0.10302351623740202
Data saved as data.json


In [None]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments and specific macros
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\}|\\mathrm\{.*?\}|\\mathbb\{.*?\}|\\mathcal\{.*?\})', latex_content, flags=re.DOTALL)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$|^\\mathrm\{.*\}$|^\\mathbb\{.*\}$|^\\mathcal\{.*\}$', part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


In [26]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments and specific macros
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\}|\\mathrm\{.*?\}|\\mathbb\{.*?\}|\\mathcal\{.*?\}|\\mathsf\{.*?\}|\\mathit\{.*?\}|\\mathfrak\{.*?\}|\\mathbf\{.*?\})', latex_content, flags=re.DOTALL)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$|^\\mathrm\{.*\}$|^\\mathbb\{.*\}$|^\\mathcal\{.*\}$|^\\mathsf\{.*\}$|^\\mathit\{.*\}$|^\\mathfrak\{.*\}$|^\\mathbf\{.*\}$', part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Metrics:
text metrics:
  edit_dist: 0.7616133866133866
  bleu: 0.1426000920188512
  meteor: 0.43000346051835725
  precision: 0.3949843260188088
  recall: 0.7065420560747664
  f_measure: 0.5067024128686327
math metrics:
  edit_dist: 0.8699402786994028
  bleu: 0.00882686253736031
  meteor: 0.06983548212495301
  precision: 0.3188854489164087
  recall: 0.13606340819022458
  f_measure: 0.19074074074074077
Data saved as data.json


In [1]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments and specific macros
    parts = re.split(r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\}|\\mathrm\{.*?\}|\\mathbb\{.*?\}|\\mathcal\{.*?\})', latex_content, flags=re.DOTALL)
    for part in parts:
        if re.match(r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$|^\\mathrm\{.*\}$|^\\mathbb\{.*\}$|^\\mathcal\{.*\}$', part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Metrics:
text metrics:
  edit_dist: 0.7706052101039649
  bleu: 0.14517004801225045
  meteor: 0.43084868409361504
  precision: 0.39831401475237094
  recall: 0.7065420560747664
  f_measure: 0.509433962264151
math metrics:
  edit_dist: 0.8684747145187602
  bleu: 0.008987610443987923
  meteor: 0.07018551007077323
  precision: 0.33881578947368424
  recall: 0.13606340819022458
  f_measure: 0.1941564561734213
Data saved as data.json


In [2]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Extended regex to include more LaTeX math environments and specific macros
    parts = re.split(
        r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\}|\\mathrm\{.*?\}|'
        r'\\mathbb\{.*?\}|\\mathcal\{.*?\}|\\mathsf\{.*?\}|\\mathit\{.*?\}|\\mathfrak\{.*?\}|\\mathbf\{.*?\}|'
        r'\\left\{.*?\\right\}|\\left\(.*?\\right\)|\\left\[.*?\\right\]|\\frac\{.*?\}\{.*?\}|\\sqrt\{.*?\}|'
        r'\\sum|\\prod|\\int|\\lim|\\sin|\\cos|\\tan|\\log|\\ln|\\exp|\\alpha|\\beta|\\gamma|\\delta|'
        r'\\epsilon|\\zeta|\\eta|\\theta|\\iota|\\kappa|\\lambda|\\mu|\\nu|\\xi|\\pi|\\rho|\\sigma|\\tau|'
        r'\\upsilon|\\phi|\\chi|\\psi|\\omega|\\Gamma|\\Delta|\\Theta|\\Lambda|\\Xi|\\Pi|\\Sigma|\\Upsilon|'
        r'\\Phi|\\Psi|\\Omega)', latex_content, flags=re.DOTALL)
    
    for part in parts:
        if re.match(
            r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$|^\\mathrm\{.*\}$|'
            r'^\\mathbb\{.*\}$|^\\mathcal\{.*\}$|^\\mathsf\{.*\}$|^\\mathit\{.*\}$|^\\mathfrak\{.*\}$|^\\mathbf\{.*\}$|'
            r'^\\left\{.*\\right\}$|^\\left\(.*\\right\}$|^\\left\[.*\\right\]$|^\\frac\{.*\}\{.*\}$|^\\sqrt\{.*\}$|'
            r'^\\sum$|^\\prod$|^\\int$|^\\lim$|^\\sin$|^\\cos$|^\\tan$|^\\log$|^\\ln$|^\\exp$|^\\alpha$|^\\beta$|'
            r'^\\gamma$|^\\delta$|^\\epsilon$|^\\zeta$|^\\eta$|^\\theta$|^\\iota$|^\\kappa$|^\\lambda$|^\\mu$|'
            r'^\\nu$|^\\xi$|^\\pi$|^\\rho$|^\\sigma$|^\\tau$|^\\upsilon$|^\\phi$|^\\chi$|^\\psi$|^\\omega$|'
            r'^\\Gamma$|^\\Delta$|^\\Theta$|^\\Lambda$|^\\Xi$|^\\Pi$|^\\Sigma$|^\\Upsilon$|^\\Phi$|^\\Psi$|^\\Omega$', 
            part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4):
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Calculate metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Calculate metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
    # Organize data into a JSON structure
    data = {
        "predictions": [predicted_text],
        "ground_truths": [ground_truth_text],
        "metrics": metrics
    }
    
    # Save the data as a JSON file
    with open("data.json", "w") as json_file:
        json.dump(data, json_file, indent=4)
    
    print("Data saved as data.json")


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Metrics:
text metrics:
  edit_dist: 0.7576174309166702
  bleu: 0.13862747755069255
  meteor: 0.4243304308878094
  precision: 0.3927461139896373
  recall: 0.6903460837887068
  f_measure: 0.500660501981506
math metrics:
  edit_dist: 0.869309450633884
  bleu: 0.00884649122346797
  meteor: 0.07135679397605077
  precision: 0.3202416918429003
  recall: 0.1392904073587385
  f_measure: 0.19413919413919414
Data saved as data.json


In [4]:
import re
import numpy as np
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import json
import Levenshtein

# Download NLTK WordNet resource (if not already downloaded)
try:
    nltk.data.find('corpora/wordnet.zip')
except LookupError:
    print("Downloading NLTK WordNet resource...")
    nltk.download('wordnet')

def extract_text_from_latex(latex_file):
    """
    Extract text from a LaTeX file.
    """
    with open(latex_file, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

def separate_text_and_math(latex_content):
    """
    Separate plain text and math expressions in a LaTeX document.
    """
    text_parts = []
    math_parts = []
    # Regex to include more LaTeX math environments and macros
    parts = re.split(
        r'(\$.*?\$|\$\$.*?\$\$|\\\(.*?\\\)|\\\[.*?\\\]|\\begin\{.*?\}.*?\\end\{.*?\}|\\mathrm\{.*?\}|'
        r'\\mathbb\{.*?\}|\\mathcal\{.*?\}|\\mathsf\{.*?\}|\\mathit\{.*?\}|\\mathfrak\{.*?\}|\\mathbf\{.*?\}|'
        r'\\left\{.*?\\right\}|\\left\(.*?\\right\)|\\left\[.*?\\right\]|\\frac\{.*?\}\{.*?\}|\\sqrt\{.*?\}|'
        r'\\sum|\\prod|\\int|\\lim|\\sin|\\cos|\\tan|\\log|\\ln|\\exp|\\alpha|\\beta|\\gamma|\\delta|'
        r'\\epsilon|\\zeta|\\eta|\\theta|\\iota|\\kappa|\\lambda|\\mu|\\nu|\\xi|\\pi|\\rho|\\sigma|\\tau|'
        r'\\upsilon|\\phi|\\chi|\\psi|\\omega|\\Gamma|\\Delta|\\Theta|\\Lambda|\\Xi|\\Pi|\\Sigma|\\Upsilon|'
        r'\\Phi|\\Psi|\\Omega)', latex_content, flags=re.DOTALL)
    
    for part in parts:
        if re.match(
            r'^\$.*\$$|^\$\$.*\$\$$|^\\\(.*\\\)$|^\\\[.*\\\]$|^\\begin\{.*\}.*\\end\{.*\}$|^\\mathrm\{.*\}$|'
            r'^\\mathbb\{.*\}$|^\\mathcal\{.*\}$|^\\mathsf\{.*\}$|^\\mathit\{.*\}$|^\\mathfrak\{.*\}$|^\\mathbf\{.*\}$|'
            r'^\\left\{.*\\right\}$|^\\left\(.*\\right\}$|^\\left\[.*\\right\]$|^\\frac\{.*\}\{.*\}$|^\\sqrt\{.*\}$|'
            r'^\\sum$|^\\prod$|^\\int$|^\\lim$|^\\sin$|^\\cos$|^\\tan$|^\\log$|^\\ln$|^\\exp$|^\\alpha$|^\\beta$|'
            r'^\\gamma$|^\\delta$|^\\epsilon$|^\\zeta$|^\\eta$|^\\theta$|^\\iota$|^\\kappa$|^\\lambda$|^\\mu$|'
            r'^\\nu$|^\\xi$|^\\pi$|^\\rho$|^\\sigma$|^\\tau$|^\\upsilon$|^\\phi$|^\\chi$|^\\psi$|^\\omega$|'
            r'^\\Gamma$|^\\Delta$|^\\Theta$|^\\Lambda$|^\\Xi$|^\\Pi$|^\\Sigma$|^\\Upsilon$|^\\Phi$|^\\Psi$|^\\Omega$', 
            part, flags=re.DOTALL):
            math_parts.append(part)
        else:
            text_parts.append(part)
    return ' '.join(text_parts), ' '.join(math_parts)

def compute_metrics(pred, gt, minlen=4): 
    metrics = {}
    if len(pred) < minlen or len(gt) < minlen:
        return metrics
    metrics["edit_dist"] = Levenshtein.distance(pred, gt) / max(len(pred), len(gt))
    reference = gt.split()
    hypothesis = pred.split()
    metrics["bleu"] = sentence_bleu([reference], hypothesis, smoothing_function=SmoothingFunction().method1)
    try:
        from nltk.translate.meteor_score import meteor_score
        metrics["meteor"] = meteor_score([reference], hypothesis)
    except ImportError:
        metrics["meteor"] = np.nan
    reference = set(reference)
    hypothesis = set(hypothesis)
    metrics["precision"] = len(reference.intersection(hypothesis)) / len(hypothesis)
    metrics["recall"] = len(reference.intersection(hypothesis)) / len(reference)
    metrics["f_measure"] = 2 * metrics["precision"] * metrics["recall"] / (metrics["precision"] + metrics["recall"])
    return metrics

def get_input():
    predicted_text_path = input("Enter path to predicted LaTeX file: ")
    ground_truth_text_path = input("Enter path to ground truth LaTeX file: ")
    
    # Extract text from LaTeX files
    predicted_text = extract_text_from_latex(predicted_text_path)
    ground_truth_text = extract_text_from_latex(ground_truth_text_path)
    
    return predicted_text, ground_truth_text

if __name__ == "__main__":
    predicted_text, ground_truth_text = get_input()
    
    # Separate text and math
    pred_text, pred_math = separate_text_and_math(predicted_text)
    gt_text, gt_math = separate_text_and_math(ground_truth_text)
    
    # Metrics for text
    text_metrics = compute_metrics(pred_text, gt_text)
    
    # Metrics for math
    math_metrics = {}
    if pred_math and gt_math:
        math_metrics = compute_metrics(pred_math, gt_math)
    
    # Metrics for combined content
    combined_pred = pred_text + ' ' + pred_math
    combined_gt = gt_text + ' ' + gt_math
    combined_metrics = compute_metrics(combined_pred, combined_gt)
    
    # Combine metrics
    metrics = {"text": text_metrics, "math": math_metrics, "combined": combined_metrics}
    
    # Display results
    print("Metrics:")
    for category, category_metrics in metrics.items():
        print(f"{category} metrics:")
        for metric, value in category_metrics.items():
            print(f"  {metric}: {value}")
    
 


Enter path to predicted LaTeX file: C:\Users\Acer\Desktop\docs\naggykiss_detected.txt
Enter path to ground truth LaTeX file: C:\Users\Acer\Desktop\docs\nagykiss24.txt
Metrics:
text metrics:
  edit_dist: 0.7576174309166702
  bleu: 0.13862747755069255
  meteor: 0.4243304308878094
  precision: 0.3927461139896373
  recall: 0.6903460837887068
  f_measure: 0.500660501981506
math metrics:
  edit_dist: 0.869309450633884
  bleu: 0.00884649122346797
  meteor: 0.07135679397605077
  precision: 0.3202416918429003
  recall: 0.1392904073587385
  f_measure: 0.19413919413919414
combined metrics:
  edit_dist: 0.8258880433777996
  bleu: 0.09761393425286502
  meteor: 0.3535532376625064
  precision: 0.5234842015371477
  recall: 0.5487914055505819
  f_measure: 0.5358391608391607


In [None]:
edit_dist: 0.818915207689667
  bleu: 0.045156766063108485
  meteor: 0.1989619951492028
  precision: 0.6677704194260485
  recall: 0.5822906641000962
  f_measure: 0.6221079691516709