In [None]:
import cv2
from pytesseract import pytesseract
import transformers

def extract_text(image_path):
  """Extracts text from an image using Tesseract OCR."""
  img = cv2.imread(image_path)
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  text = pytesseract.image_to_string(gray)
  return text.lower()  # Convert to lowercase for consistency

def compare_text(text1, text2, model_name="sentence-transformers/all-mpnet-base-v2"):
  """Compares two text strings using pre-trained sentence transformers."""
  # Load pre-trained sentence transformers model
  model = transformers.SentenceTransformer.from_pretrained(model_name)

  # Encode texts into sentence embeddings
  embeddings = model.encode([text1, text2])

  # Calculate cosine similarity between embeddings
  similarity = transformers.util.cosine_similarity(embeddings[0].unsqueeze(0), embeddings[1].unsqueeze(0))[0][0].item()
  return similarity

# Example usage
image_text = extract_text("path/to/your/image.jpg")  # Replace with your image path
reference_text = "This is the text string to compare with."

similarity_score = compare_text(image_text, reference_text)

print(f"Similarity score between image text and reference text: {similarity_score}")

if similarity_score > 0.5:
  print("The texts are considered similar.")
else:
  print("The texts are not very similar.")
