In [None]:
# Install required libraries
%pip install pdfplumber

In [None]:
import os
import pdfplumber
from tqdm import tqdm

def extract_text_from_pdf(pdf_path, double_column=False):
    korean_text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            if double_column:
                # Extract text from the left column
                left_bbox = (0, 0, page.width / 2, page.height)
                left_text = page.within_bbox(left_bbox).extract_text()
                
                # Extract text from the right column
                right_bbox = (page.width / 2, 0, page.width, page.height)
                right_text = page.within_bbox(right_bbox).extract_text()
                
                # Combine the text from both columns
                korean_text += (left_text if left_text else "") + "\n" + (right_text if right_text else "") + "\n"
            else:
                korean_text += page.extract_text() + "\n"
    return korean_text

def process_pdfs(directory, double_column=False):
    for filename in tqdm(os.listdir(directory)):
        if filename.endswith('.pdf'):
            filepath = os.path.join(directory, filename)
            korean_text = extract_text_from_pdf(filepath, double_column)
            
            # Save the extracted text to a .txt file
            txt_filename = os.path.splitext(filename)[0] + '.txt'
            txt_filepath = os.path.join(directory, txt_filename)
            with open(txt_filepath, 'w', encoding='utf-8') as txt_file:
                txt_file.write(korean_text)

# Process PDFs
directory = ""
process_pdfs(directory, double_column=True)
