In [6]:
import os
from google.cloud import vision_v1
from google.oauth2 import service_account
from tqdm import tqdm
import openpyxl
import shutil

def batch_ocr(image_folder_path, credentials_path):
    # Initialize the client
    credentials = service_account.Credentials.from_service_account_file(credentials_path)
    client = vision_v1.ImageAnnotatorClient(credentials=credentials)

    # Initialize an empty list to store the results
    results = []

    # Iterate through the images in the folder
    for filename in os.listdir(image_folder_path):
        if filename.endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            image_path = os.path.join(image_folder_path, filename)
            # Read the image
            with open(image_path, 'rb') as image_file:
                content = image_file.read()
            # Perform OCR on the image
            image = vision_v1.Image(content=content)
            response = client.text_detection(image=image)

            # Extract and store the detected text in a list
            texts = response.text_annotations
            if texts:
                detected_text = texts[-1].description.split('\n')
                results.append((filename,detected_text))

    return results
# Example usage:
# Replace 'path/to/your/credentials.json' and 'path/to/your/image_folder' with your paths
# detected_texts = batch_ocr('path/to/your/image_folder', 'path/to/your/credentials.json')
# for i, result in enumerate(detected_texts, start=1):
#     print(f"Text from Image {i}:")
#     for line in result:
#         print(line)
# image_folder_path = '../seg_letter/J02'

In [12]:
# image_folder_name = 'seg_letter'
image_folder_path = '../../pilot data/data/'
# Replace with your own Google Cloud credentials JSON file path
credentials_path = '../../credentials.json'
workbook = openpyxl.Workbook()
sheet = workbook.active

for file_name in tqdm(os.listdir(image_folder_path)):
    result = batch_ocr(image_folder_path + file_name,credentials_path)
    
    for (img_name,ocr_text) in result:
        text = ""
        for i in ocr_text:
            text += i
        sheet.append((file_name,img_name,text))
        
workbook.save("../pilotdata_output.xlsx")
workbook.close()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [06:22<00:00, 47.86s/it]


In [6]:
# run program on one image, image paste and output the result of ocr in a new folder
image_folder_name = 'RC05028_00'
image_folder_path = '../../seg_letter/seg_letter'+image_folder_name
# Replace with your own Google Cloud credentials JSON file path
credentials_path = '../../credentials.json'
result = batch_ocr(image_folder_path,credentials_path)
# Define your list of tuples (old filename, new filename)
file_rename_list = result

# Replace 'path/to/your/folder' with the path to the folder containing your files
folder_path = image_folder_path

# Define the name of the new directory
new_directory_name = "../../seg_letter_new/" + image_folder_name

# Create the new directory
new_directory_path = os.path.join(folder_path, new_directory_name)
os.makedirs(new_directory_path, exist_ok=True)

# Iterate through the list of tuples
for old_filename, new_filename in file_rename_list:
    # Construct the full paths for the old and new names
    old_path = os.path.join(folder_path, old_filename)
    new_path = os.path.join(new_directory_path, new_filename[0]+old_filename[:-4]+".jpg")

    # Move the file to the new directory
    shutil.copy(old_path, new_path)

#     print(f"Copied: {old_filename} to {new_directory_name}/{new_filename}")

Copied: 10.jpg to ../../seg_letter_new/RC05117_03/['打']
Copied: 100.jpg to ../../seg_letter_new/RC05117_03/['た']
Copied: 101.jpg to ../../seg_letter_new/RC05117_03/['計']
Copied: 102.jpg to ../../seg_letter_new/RC05117_03/['生']
Copied: 103.jpg to ../../seg_letter_new/RC05117_03/['は']
Copied: 104.jpg to ../../seg_letter_new/RC05117_03/['对']
Copied: 105.jpg to ../../seg_letter_new/RC05117_03/['T']
Copied: 106.jpg to ../../seg_letter_new/RC05117_03/['IR']
Copied: 107.jpg to ../../seg_letter_new/RC05117_03/['H']
Copied: 108.jpg to ../../seg_letter_new/RC05117_03/['の']
Copied: 109.jpg to ../../seg_letter_new/RC05117_03/['は']
Copied: 11.jpg to ../../seg_letter_new/RC05117_03/['天']
Copied: 110.jpg to ../../seg_letter_new/RC05117_03/['た']
Copied: 113.jpg to ../../seg_letter_new/RC05117_03/['5.']
Copied: 116.jpg to ../../seg_letter_new/RC05117_03/['大']
Copied: 117.jpg to ../../seg_letter_new/RC05117_03/['の']
Copied: 118.jpg to ../../seg_letter_new/RC05117_03/['壁']
Copied: 119.jpg to ../../seg_le