## Import libraries and dataset

In [None]:
import pandas as pd
import numpy as np
import pickle as pkl

In [None]:
!unzip /content/dataset.zip -d /content/dataset

Archive:  /content/dataset.zip
  inflating: /content/dataset/ch_0661a3fb.bang_hieu.112614.jpg  
  inflating: /content/dataset/ch_0c6015c6.bang_hieu.012611.jpg  
  inflating: /content/dataset/ch_0ca438e1.bang_hieu.105209.jpg  
  inflating: /content/dataset/ch_17e509ce.bang_hieu.105926.jpg  
  inflating: /content/dataset/ch_193f1265.bang_hieu.110449.jpg  
  inflating: /content/dataset/CH_1e13a596.bang_hieu.093702.jpg  
  inflating: /content/dataset/ch_2b1276ee.bang_hieu.110236.jpg  
  inflating: /content/dataset/ch_2bac1698.bang_hieu.103907.jpg  
  inflating: /content/dataset/ch_311027db.bang_hieu.101332.jpg  
  inflating: /content/dataset/ch_31271993.bang_hieu.101512.jpg  
  inflating: /content/dataset/ch_3492e45c.bang_hieu.105806.jpg  
  inflating: /content/dataset/ch_3d239014.bang_hieu.112558.jpg  
  inflating: /content/dataset/ch_48c2954d.bang_hieu.111900.jpg  
  inflating: /content/dataset/ch_52e20baa.bang_hieu.102052.jpg  
  inflating: /content/dataset/ch_5705a6b8.bang_hieu.110321.

In [None]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

url = "https://docs.google.com/spreadsheets/d/1M4LiNR_uOGSLUke82OjuuYdhEwYc8wKAkMUZil-zenY"

wb = gc.open_by_url(url)
# worksheet = gc.open().sheet1
worksheet = wb.get_worksheet(0)

# get_all_values gives a list of rows.
rows = worksheet.get_all_values()
# print(rows)
df = pd.DataFrame.from_records(rows[1:], columns=rows[0])
# df = df[1:]
df = df.replace('','No phone number')

df.head()

In [None]:
from google.colab.patches import cv2_imshow
import shutil
import cv2 as cv
from tqdm import tqdm

dir = "/content/dataset/"
listfile = df.Name

for i in range(len(listfile[:5])):
  img = cv.imread(dir + listfile[i])
  cv2_imshow(img)
  print("Phone number ground truth: ",df['Numbers ground truth'][i])

## Run ocr

In [None]:
import google.generativeai as genai
import google.ai.generativelanguage as glm

API_KEY = input("Enter your Gemini API key: ")
genai.configure(api_key=API_KEY)

model = genai.GenerativeModel(model_name="gemini-1.5-flash")

In [None]:
import PIL.Image
import time

y_gemini = []
prompt = "Extract only the one most prominent phone number and closest to the center of this signboard. Answer should contain 10 or 11 digits and not contain any other text. If theres no phone number in the image, return 'No phone number'."

for i in tqdm(range(len(listfile))):
  if (i+1) % 14 == 0:
    time.sleep(50)
  sample_file = PIL.Image.open(dir + listfile[i])
  response = model.generate_content([prompt, sample_file], stream=True)
  response.resolve()
  y_gemini.append(response.text)
  print(i,response.text)

In [None]:
import pickle
with open('y_gemini.pkl', 'wb') as f:
  pkl.dump(y_gemini, f)

In [None]:
y_gemini

## Evaluate

In [None]:
!pip install thefuzz

Collecting thefuzz
  Downloading thefuzz-0.22.1-py3-none-any.whl.metadata (3.9 kB)
Collecting rapidfuzz<4.0.0,>=3.0.0 (from thefuzz)
  Downloading rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading thefuzz-0.22.1-py3-none-any.whl (8.2 kB)
Downloading rapidfuzz-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, thefuzz
Successfully installed rapidfuzz-3.11.0 thefuzz-0.22.1


In [None]:
import pickle as pkl
with open('y_gemini.pkl', 'rb') as f:
  y_gemini = pkl.load(f)

In [None]:
from thefuzz import fuzz

y = df['Numbers ground truth']
preds = []
target = []

total = 0
for i in range(0, len(y)):
  n1 = ''.join([j for j in str(y[i]) if j.isdigit()])
  ng = ''.join([j for j in y_gemini[i] if j.isdigit()])
  preds.append(ng)
  target.append(n1)
  print(fuzz.ratio(n1, ng), end='\t')
  total += fuzz.ratio(n1, ng)
  if i % 10 == 0:
    print()
print()
print('Average: ' + str(total / len(y)))

100	
100	100	100	100	100	100	100	50	100	100	
100	100	100	100	100	100	100	100	100	100	
100	100	100	100	100	100	100	100	100	100	
100	95	100	100	100	100	100	100	100	100	
100	100	100	100	100	100	100	100	100	100	
100	100	100	100	100	100	100	100	100	100	
100	100	100	100	100	100	100	100	95	100	
100	0	100	100	100	100	100	100	100	40	
100	100	100	100	100	100	100	100	100	100	
100	50	100	100	100	100	100	100	100	100	
100	100	100	100	90	100	100	100	100	60	
40	100	100	100	80	100	60	100	100	100	
90	100	100	100	30	100	100	80	100	100	
40	82	30	100	90	40	100	100	95	30	
100	80	40	40	100	100	100	40	50	100	
100	40	100	100	90	90	90	100	
Average: 92.62264150943396


In [None]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Downloading torchmetrics-1.6.1-py3-none-any.whl (927 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m927.3/927.3 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.9-py3-none-any.whl (28 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.11.9 torchmetrics-1.6.1


In [None]:
from torchmetrics.text import CharErrorRate
cer = CharErrorRate()
cer(preds, target)

tensor(0.0948)