In [2]:
import os
import csv
from google.cloud import vision
from google.oauth2 import service_account

def clean_image_name(image_name):
    # Split the name on '-' and take the first part
    cleaned_name = image_name.split('-')[0]
    return cleaned_name

def extract_text_from_image(image_path, client):
    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations

    if response.error.message:
        raise Exception(f'{response.error.message}')

    if texts:
        return texts[0].description
    else:
        return ''

def process_images_and_save_to_csv(source_folder, output_csv, client):
    # Ensure the source folder exists
    if not os.path.exists(source_folder):
        print(f"The folder {source_folder} does not exist.")
        return

    # List all files in the source folder
    all_files = os.listdir(source_folder)

    # Filter out only image files (assuming images have common extensions)
    image_files = [file for file in all_files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'))]

    # Process each image and extract text
    extracted_texts = []
    for image in image_files:
        image_path = os.path.join(source_folder, image)
        # Remove the extension
        image_name = os.path.splitext(image)[0]
        # Clean the name
        cleaned_name = clean_image_name(image_name)
        # Extract text from the image
        text = extract_text_from_image(image_path, client)
        extracted_texts.append((cleaned_name, text))

    # Write the extracted texts to a CSV file
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Image Name', 'Extracted Text'])  # Write the header
        for image_name, text in extracted_texts:
            writer.writerow([image_name, text])
    
    print(f"Processed {len(extracted_texts)} images and saved extracted text to {output_csv}")

# Parameters
source_folder = 'images'  # Replace this
output_csv = 'output2.csv'  # Replace this or let it stay
service_account_key = 'myvisionproject-424702-6bdcada1c6f4.json'  # Replace service account JSON key file

# Set up Vision API client
credentials = service_account.Credentials.from_service_account_file(service_account_key)
client = vision.ImageAnnotatorClient(credentials=credentials)

# Process images and save to CSV
process_images_and_save_to_csv(source_folder, output_csv, client)

Processed 2 images and saved extracted text to output2.csv


In [2]:
# Convert .csv to .xlsx
import pandas as pd
import openpyxl
data = pd.read_csv('output2.csv')
data.to_excel('output2.xlsx', index=False)