In [None]:
pip install nltk



In [None]:
pip install scikit-learn



In [None]:
pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=f6b4289263bf419742a211a012eed63e8187decf674e78518428591a05f22a57
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
pip install torch torchvision torchaudio

In [None]:
pip install easyocr

In [None]:
pip install easyocr[english]

In [None]:
pip install transformers timm flash_attn einops

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score

In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# BLEU score

In [None]:
def calculate_bleu_score(generated: str, ground_truth: str):
  generated = nltk.word_tokenize(generated)
  ground_truth = nltk.word_tokenize(ground_truth)
  return sentence_bleu([ground_truth], generated)

# ROUGE score

In [None]:
def calculate_rouge_score(generated: str, ground_truth: str):
  scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

  # Calculate ROUGE scores
  scores = scorer.score(ground_truth, generated)
  return scores["rouge1"].fmeasure, scores["rouge2"].fmeasure, scores["rougeL"].fmeasure

# TER score

In [None]:
def calculate_ter_score(generated: str, ground_truth: str):
    """Calculates the Translation Edit Rate (TER)."""
    edits = edit_distance(ground_truth, generated)
    ref_length = len(ground_truth.split())

    # TER is the edit distance divided by the length of the reference
    ter_score = edits / ref_length if ref_length > 0 else float('inf')
    return ter_score


def edit_distance(ref, hyp):
    """Calculates the edit distance between reference and hypothesis."""
    ref_words = ref.split()
    hyp_words = hyp.split()

    # Create a matrix to store distances
    d = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]

    # Initialize the distance matrix
    for i in range(len(ref_words) + 1):
        d[i][0] = i
    for j in range(len(hyp_words) + 1):
        d[0][j] = j

    # Fill the matrix
    for i in range(1, len(ref_words) + 1):
        for j in range(1, len(hyp_words) + 1):
            cost = 0 if ref_words[i - 1] == hyp_words[j - 1] else 1
            d[i][j] = min(d[i - 1][j] + 1,    # Deletion
                           d[i][j - 1] + 1,    # Insertion
                           d[i - 1][j - 1] + cost)  # Substitution

    return d[len(ref_words)][len(hyp_words)]

# METEOR score

In [None]:
def calculate_meteor_score(generated: str, ground_truth: str):
  ground_truth = nltk.word_tokenize(ground_truth)
  generated = nltk.word_tokenize(generated)
  return meteor_score([ground_truth], generated)

# easyOCR wrapper

In [None]:
import easyocr

def get_easyOCR(image_path: str):
  reader = easyocr.Reader(['en']) # specify the language
  result = reader.readtext(image_path)

  final_text = ""
  for (bbox, text, prob) in result:
    final_text += " " + text
  return text

# Flourence-2 Wrapper

In [None]:
from transformers import AutoProcessor, AutoModelForCausalLM
from PIL import Image
import requests
import copy
model_id = 'microsoft/Florence-2-large'
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).eval().cuda()
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

def run_example(task_prompt, text_input=None):
	if text_input is None:
    		prompt = task_prompt
	else:
    		prompt = task_prompt + text_input
	inputs = processor(text=prompt, images=image, return_tensors="pt")
	generated_ids = model.generate(
  	input_ids=inputs["input_ids"].cuda(),
  	pixel_values=inputs["pixel_values"].cuda(),
  	max_new_tokens=1024,
  	early_stopping=False,
  	do_sample=False,
  	num_beams=3,
	)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
	parsed_answer = processor.post_process_generation(
    	generated_text,
    	task=task_prompt,
    	image_size=(image.width, image.height)
	)

	return parsed_answer


# Examples

In [None]:
ground_truth_dict = {"./adsc202100082-sup-0001-misc_information_page_14_table_0_caption_0.png" : "Table S4 shows the effect that the catalyst loading (mol %) had on the efficiency and selectivity of the reaction. Using C4 as the catalyst, we observed that lowering the catalyst loading to 1 mol% (entry 2) was deleterious for the effciency of the reaction, albeit the enantiocontrol of the process remained suprisingly unchanged (20% yield, 87:13 er). Furthermore, as raising the loading up to 20% (entry 3) didn't improved the overall reaction performance, we chose to maintain 10 mol% as the best loading for the process",
                "./anie201805732_page_3_figure_0_caption_0.png" : "Scheme 5. Cycloaddition product diversification. Reagents: a) m-CPBA; b) Me3S(O)I, NaH; c) NBS, AIBN, then NaN3; d_Pd(OH)2/C, TBHP, K2CO3; e) CH3NO2, DBU; f) KHMDS, BnBr, HMPA; g) OsO4, NMO; h) Pb(OAc)4, then NaBH3CN, BnNH2, AcOH; i) Me3SiCHN2, nBuLi; j) TsOH; k) DIBAL; i) MeC(OEt)3, PivOH; m) Pd/C, H2; n) RuCl3.3H2O, NaIO4. AIBN =2,2'-azobisisobutyronitrile, DIBAL = di-isobutylaluminium hydride, DBU = 1,8-diazabicyclo[5.4.0]undec-7-ene, HMDS = hexamethyldisilazide, m-CPBA = meta-chloroperbenzoic acid, NBS = N-bromosuccinimide, NMO = N-methylmorpholine N-oxide, Piv = pivaloyl, TBHP = tert-butyl hydrogen peroxide",
                "./cs2c01442_page_3_figure_2_caption_0.png" : "Figure 2. Photocatalyzed sulfinate salt formation and telescoped reaction from a primary sulfonamide. aReaction conditions: (1) p-CF3-benzyl bromide (1.5 equiv), TBAB (20 mol%), 100 oC, 24 h; (2) NFSI (1.5 equiv), K2CO3 (1.1 equiv), solvent switch to THF/H2) (10:1, 0.2 M), rt./ 12 h; (3) acidify using 2 M a1. H3PO4 extract (Na2CO3). bReaction conditions: step (i) sulfonamide 9 (1.0 equiv), p-anisaldehyde (1.0 equiv), Amberlyst 15 (5 mg/mmol), PhMe (0.1 M), Dean-Stark, 12 H; step (iii) MVK (2.5 equiv), 5CzBN (0.5 mol%) TMS3Si-H (1.5 equiv), PhMe (0.05 M), blue LEDs, 15 oC, 4h",
                "./cs2c03805_page_5_figure_0_caption_0.png" : "Figure 4. (a) Photoexcitation of benzophenone entails the hydrogen atom abstraction from silane. (b) Microsecond triplet-triplet differential absorbtion spectra recorded at different times after laster excitation of employed BP I in deoxygenated acetonitrile with a 5 ns laser pulse at 319 nm in the presence of an excess of (TMS)3SiH (8.6 equiv). (c) Comparison of microsecond triplet-triplet differ- ential absorbtion spectra recorded at 50 us after 319 excitation in the presence of (red dash) (TMS)3SiH (8.6 equiv) and (green dash) tetrahydrofuran (8.6 equiv).",
                "./cs3c05150_page_4_figure_0_caption_0.png" : "Figure 3. (A) Possivle pathways suggested by DFT calculations. (B) DFT calculation energy profile and structures of the transition states. (C) Quenching of emission with various substrates (40 mM). Addition of collidine (10 mM) does not enhance the quenching (only additive effect and not cooperative). (D) Fraction of quenching efficiency of Fukuzumiz catalyst emission with two different substrates (40 mM), calculated as 1- Iquenccher/I0, where Iquencher and I0 are the emission intensities in presence and absence of quencher, respectively. The inset shows different areas of contribution of the acridinium and charge transfer states to the fluorescence spectrum of the Fukuzumi catalyst. (E) Transient absorbption spectra with the model alcohol substrate (1o) (40 mM) at 300 ns after laser excitation at 430 nm. (F) Transient absorbption spectra with 4-methylanisole (40nm) at 100 ns after laser excitation at 430 nm. (G) transient absorbtion spectra with the model alcohol substrate (1n) (50 mM)."}

In [None]:
for key in ground_truth_dict:
  ground_truth = ground_truth_dict[key]

  image = Image.open(key)
  task_prompt = '<OCR>'
  generated = run_example(task_prompt)["<OCR>"].strip()
  print ("############")
  print(key)
  print(generated)
  # print(ground_truth)
  print(calculate_bleu_score(generated, ground_truth))
  print(calculate_rouge_score(generated, ground_truth))
  print(calculate_ter_score(generated, ground_truth))
  print(calculate_meteor_score(generated, ground_truth))
  print ("############")