# EXTRACT TEXT TO IMG

## Configurations

In [24]:
api_key = "" # Get your API KEY for FREE registering in https://ocr.space/ocrapi/freekey
images_folder = "images" # Folder with images
language = "spa" 
version = 5 # Check differents API versions. Read updates here https://ocr.space/ocrapi

## Methods definitions

In [19]:
import os, base64, requests, asyncio

image_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff") # VALID IMAGE EXTENSIONS
api_endpoint = "https://api.ocr.space/parse/image"

async def extractText(image_path):
    try:
        # Read the image file and encode it as a base64 string
        with open(image_path, "rb") as f:
            extension = os.path.splitext(image_path)[1][1:]
            image_data = f.read()
            image_data_base64 = base64.b64encode(image_data).decode("utf-8")

        # Create the base64-encoded image string in the correct format
        base64_image = f"data:image/{extension};base64,{image_data_base64}"

        #print(f"Making API call to {api_endpoint}...")
        # Make the API call
        response = requests.post(api_endpoint, data={
            "apikey": api_key,
            "base64Image": base64_image,
            "language": language,
            "OCREngine":5
        })
        #print(response.json())
        return response.json()["ParsedResults"][0]["ParsedText"]
    except Exception as e:
        print(e)
        return "FAILED"

def textToFile(text, file, format = "txt", output_folder = "Output"):
    try:
        if format == "pdf":
            # and in a pdf file
            from fpdf import FPDF

            pdf = FPDF()
            pdf.add_page()
            pdf.set_font("Arial", size=12)
            pdf.cell(200, 10, txt=text, ln=1, align="C")
            pdf.output(f"{output_folder}/{file}.pdf")
        
        elif format == "word":
            # and in a word file
            from docx import Document

            document = Document()
            document.add_paragraph(text)
            document.save(f"{output_folder}/{file}.docx")

        else:
            # and in a txt file
            with open(f"{output_folder}/{file}.txt", "w") as f:
                f.write(text)
    except Exception as e:
        print(e)

async def processImage(image_path):
    try:
        print(f"Processing {image_path}...")
        # Extract text from the image
        text = await extractText(image_path)
        #print(f"Text for {image_path} is {text}")
        # Create a file with the extracted text
        file_name = os.path.splitext(os.path.basename(image_path))[0]
        textToFile(text, file_name, format = "word")
    except Exception as e:
        print(e)

async def main():
    try:
        image_files = [os.path.join(images_folder, file) for file in os.listdir(images_folder) if file.lower().endswith(image_extensions)]
        print(f"Nº images to process: {len(image_files)}")
        print(f"Image files: {image_files}")
        tasks = [asyncio.create_task(processImage(image)) for image in image_files]
        await asyncio.gather(*tasks)
    except Exception as e:
        print(e)
        raise e

## Execution

In [25]:
# Run the main function
await main()

Image files: ['images/poesias\\A mi hija Nuria.jpeg', 'images/poesias\\cinco_años_sin_mi_niño.jpeg', 'images/poesias\\Dim dim dom.jpeg', 'images/poesias\\El de los ojitos negros.jpeg', 'images/poesias\\Es otra Navidad.jpeg', 'images/poesias\\Esta casa este lugar.jpeg', 'images/poesias\\Estamos viviendo crisis.jpeg', 'images/poesias\\Jovita soy miguelito.jpeg', 'images/poesias\\Juanjito.jpeg', 'images/poesias\\mañanitas.jpeg', 'images/poesias\\mi niño lindo esta triste.jpeg', 'images/poesias\\miguel .jpeg', 'images/poesias\\miguel 2.jpeg', 'images/poesias\\Miguel ocho años.jpeg', 'images/poesias\\Miguelito .jpeg', 'images/poesias\\Niña preciosa.jpeg', 'images/poesias\\Palomita mensajera (2).jpeg', 'images/poesias\\por un buen familiar.jpeg', 'images/poesias\\querido miguel 1.jpeg', 'images/poesias\\querido miguel 2.jpeg', 'images/poesias\\querido miguel 3.jpeg', 'images/poesias\\Querido Miguel desde el cielo.jpeg', 'images/poesias\\querido miguel.jpeg', 'images/poesias\\te cuento un cue