<a href="https://colab.research.google.com/github/hitesh582/RL-CSL348-Reinforcement-Learning/blob/main/manavlawmitradataintocsv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary packages in Colab
!pip install pdf2image pytesseract torch transformers pandas spacy tqdm ipywidgets

# Download the spaCy English language model
!python -m spacy download en_core_web_sm

# Enable Jupyter widgets extension (for interactive elements if needed)
!jupyter nbextension enable --py widgetsnbextension --sys-prefix


Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m53.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytesseract, pdf2image, jedi
Successfully installed jedi-0.19.2 pdf2image-1.17.0 pytesseract-0.3.13
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m57.0 MB/s[0m eta [36m

In [None]:
# Imports and setup
import os
from pdf2image import convert_from_path
import pytesseract
from transformers import pipeline
import pandas as pd
import spacy
from pathlib import Path
import torch
from tqdm import tqdm
from IPython.display import display, HTML

# Install dependencies for Colab
!apt-get install -y poppler-utils
!apt-get install -y tesseract-ocr
!pip install pdf2image pytesseract transformers pandas spacy tqdm

# Set paths (no need to manually set in Colab since Tesseract and Poppler are installed system-wide)
POPPLER_PATH = '/usr/bin'
TESSERACT_PATH = '/usr/bin/tesseract'

# Verify installations
print(f"Checking if Poppler path exists: {os.path.exists(POPPLER_PATH)}")
print(f"Checking if Tesseract exists: {os.path.exists(TESSERACT_PATH)}")

# Check CUDA availability
if torch.cuda.is_available():
    print(f"CUDA available: True")
    print(f"Current GPU: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
else:
    print("CUDA not available")


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.5 [186 kB]
Fetched 186 kB in 1s (152 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 123629 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.5_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.5) ...
Setting up poppler-utils (22.02.0-2ubuntu0.5) ...
Processing triggers for man-db (2.10.2-1) ...
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
T

In [None]:
# Second cell - Class definition
class CourtCaseProcessor:
    def __init__(self):
        # Check if CUDA is available
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")

        if self.device == "cuda":
            torch.cuda.set_device(0)
            print(f"Using GPU: {torch.cuda.get_device_name(0)}")

        # Initialize NLP components
        self.summarizer = pipeline("summarization",
                                   model="sshleifer/distilbart-cnn-12-6",
                                   device=0 if self.device == "cuda" else -1)
        self.nlp = spacy.load("en_core_web_sm")

    def pdf_to_text(self, pdf_path):
        """Convert PDF to text using OCR"""
        try:
            print(f"\nAttempting to convert {pdf_path} to text...")
            # No need to specify poppler_path in Colab
            images = convert_from_path(pdf_path)
            print(f"Successfully converted PDF to {len(images)} images")

            text = ""
            for i, image in enumerate(images):
                print(f"Processing page {i+1}/{len(images)}")
                page_text = pytesseract.image_to_string(image)
                text += page_text + "\n"

            print(f"Text extraction complete. Extracted {len(text)} characters")
            return text
        except Exception as e:
            print(f"Error processing {pdf_path}: {str(e)}")
            return None

    def get_sections(self, text):
        """Extract legal sections with their corresponding acts"""
        doc = self.nlp(text)
        sections = set()

        acts = {
            'IPC': 'Indian Penal Code',
            'CrPC': 'Code of Criminal Procedure',
            'CPC': 'Code of Civil Procedure',
            'IT Act': 'Information Technology Act',
            'Consumer Protection Act': 'Consumer Protection Act',
            'NDPS': 'Narcotic Drugs and Psychotropic Substances Act',
            'Prevention of Corruption Act': 'Prevention of Corruption Act'
        }

        for i, token in enumerate(doc):
            if token.text.lower() in ['section', 's.', 'sec.', 'ss.', 'sections']:
                if i + 1 < len(doc):
                    next_tokens = doc[i+1:i+10].text
                    section_num = ''
                    act_name = ''

                    for char in next_tokens.split()[0]:
                        if char.isdigit() or char in '-,':
                            section_num += char

                    for act_key in acts:
                        if act_key in next_tokens:
                            act_name = acts[act_key]
                            break

                    if section_num:
                        if act_name:
                            sections.add(f"Section {section_num} of the {act_name}")
                        else:
                            sections.add(f"Section {section_num}")

        return ', '.join(sections) if sections else 'No sections found'

    def get_summary(self, text):
        """Generate a summary focusing on case subject matter"""
        try:
            # Clean the text
            text = text.replace('\n', ' ').strip()

            # Define case subject patterns
            case_subjects = {
                'murder': ['murder', 'killed', 'death', 'homicide', '302 IPC', 'deceased'],
                'property dispute': ['property', 'land', 'possession', 'title', 'ownership', 'tenant'],
                'corruption': ['corruption', 'bribe', 'misappropriation', 'prevention of corruption act'],
                'theft': ['theft', 'stolen', 'robbery', 'burglary', '379 IPC'],
                'assault': ['assault', 'attack', 'hurt', 'injury', 'grievous', '324 IPC', '325 IPC'],
                'fraud': ['fraud', 'cheating', 'misrepresentation', 'forgery', '420 IPC'],
                'rape': ['rape', 'sexual assault', '376 IPC'],
                'dowry': ['dowry', 'dowry death', '304B IPC'],
                'drugs': ['drugs', 'NDPS', 'narcotic', 'possession of drugs'],
                'domestic violence': ['domestic violence', 'cruelty', '498A IPC'],
                'cybercrime': ['cyber', 'computer', 'online', 'IT Act'],
                'defamation': ['defamation', 'defame', '499 IPC', '500 IPC'],
                'kidnapping': ['kidnapping', 'abduction', '363 IPC'],
                'accident': ['accident', 'negligence', 'motor vehicle', 'compensation']
            }

            # Find main subject of the case
            text_lower = text.lower()
            case_subject = None
            max_matches = 0

            for subject, keywords in case_subjects.items():
                matches = sum(1 for keyword in keywords if keyword in text_lower)
                if matches > max_matches:
                    max_matches = matches
                    case_subject = subject

            # Get context around the subject
            doc = self.nlp(text)
            relevant_sentences = []

            # First, try to get facts about the case
            fact_indicators = ['facts', 'brief facts', 'prosecution case', 'allegations', 'according to']
            for sent in doc.sents:
                sent_text = sent.text.lower()
                if any(indicator in sent_text for indicator in fact_indicators):
                    if len(sent_text.split()) > 5:
                        relevant_sentences.append(sent.text)
                        if len(relevant_sentences) >= 2:
                            break

            # If no facts found, get sentences containing the case subject
            if not relevant_sentences and case_subject:
                subject_keywords = case_subjects[case_subject]
                for sent in doc.sents:
                    sent_text = sent.text.lower()
                    if any(keyword in sent_text for keyword in subject_keywords):
                        if len(sent_text.split()) > 5:
                            relevant_sentences.append(sent.text)
                            if len(relevant_sentences) >= 2:
                                break

            # Construct summary
            if case_subject:
                summary_parts = []
                summary_parts.append(f"This is a case regarding {case_subject}.")
                if relevant_sentences:
                    summary_parts.append(" ".join(relevant_sentences))
                return " ".join(summary_parts)
            else:
                return "Unable to determine the main subject of the case."

        except Exception as e:
            print(f"Error generating summary: {str(e)}")
            return "Error generating summary"

    def process_folder(self, folder_path, output_csv):
        """Process all PDFs in a folder and save results to CSV"""
        results = []
        folder = Path(folder_path)

        if not folder.exists():
            print(f"Error: Folder {folder_path} does not exist!")
            return

        pdf_files = list(folder.glob('*.pdf'))
        if not pdf_files:
            print(f"No PDF files found in {folder_path}")
            return

        print(f"Found {len(pdf_files)} PDF files to process")

        for pdf_file in tqdm(pdf_files, desc="Processing PDFs"):
            print(f"\nProcessing: {pdf_file.name}")

            text = self.pdf_to_text(str(pdf_file))
            if not text:
                print(f"Skipping {pdf_file.name} due to text extraction failure")
                continue

            try:
                sections = self.get_sections(text)
                summary = self.get_summary(text)

                result = {
                    'Summary': summary,
                    'Sections': sections,
                }
                results.append(result)

            except Exception as e:
                print(f"Error processing {pdf_file.name}: {str(e)}")

        if results:
          df=pd.DataFrame(results)
          df.to_csv(output_csv,index=False)
          print("Results saved to output_csv")
        else:
          print("No results to save to CSV")


In [None]:
# Third cell - Run the processor
# Initialize processor
processor = CourtCaseProcessor()

# Define paths
pdf_folder = r"Dataset"  # Update this path
output_csv = "court_cases_summary.csv"

# Process the folder
processor.process_folder(pdf_folder, output_csv)

Using device: cuda
Using GPU: Tesla T4


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Found 109 PDF files to process


Processing PDFs:   0%|          | 0/109 [00:00<?, ?it/s]


Processing: TP_2004_8_scc_644_652_johri67_yahoocom_20240927_123744_1_9.pdf

Attempting to convert Dataset/TP_2004_8_scc_644_652_johri67_yahoocom_20240927_123744_1_9.pdf to text...
Successfully converted PDF to 9 images
Processing page 1/9
Processing page 2/9
Processing page 3/9
Processing page 4/9
Processing page 5/9
Processing page 6/9
Processing page 7/9
Processing page 8/9
Processing page 9/9
Text extraction complete. Extracted 31494 characters


Processing PDFs:   1%|          | 1/109 [01:45<3:10:20, 105.75s/it]


Processing: TP_2022_15_scc_593_595_johri67_yahoocom_20240917_151346_1_3.pdf

Attempting to convert Dataset/TP_2022_15_scc_593_595_johri67_yahoocom_20240917_151346_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 9598 characters


Processing PDFs:   2%|▏         | 2/109 [02:18<1:51:42, 62.64s/it] 


Processing: J_2020_SCC_OnLine_Ker_11961_johri67_yahoocom_20240917_142949_1_1.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_Ker_11961_johri67_yahoocom_20240917_142949_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:   3%|▎         | 3/109 [02:28<1:08:08, 38.57s/it]

Text extraction complete. Extracted 2437 characters

Processing: TP_2018_9_scc_798_800_johri67_yahoocom_20240927_123728_1_3.pdf

Attempting to convert Dataset/TP_2018_9_scc_798_800_johri67_yahoocom_20240927_123728_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 9343 characters


Processing PDFs:   4%|▎         | 4/109 [02:57<1:01:24, 35.09s/it]


Processing: TP_2015_3_scc_779_799_johri67_yahoocom_20240927_121140_1_21.pdf

Attempting to convert Dataset/TP_2015_3_scc_779_799_johri67_yahoocom_20240927_121140_1_21.pdf to text...
Successfully converted PDF to 21 images
Processing page 1/21
Processing page 2/21
Processing page 3/21
Processing page 4/21
Processing page 5/21
Processing page 6/21
Processing page 7/21
Processing page 8/21
Processing page 9/21
Processing page 10/21
Processing page 11/21
Processing page 12/21
Processing page 13/21
Processing page 14/21
Processing page 15/21
Processing page 16/21
Processing page 17/21
Processing page 18/21
Processing page 19/21
Processing page 20/21
Processing page 21/21
Text extraction complete. Extracted 76054 characters


Processing PDFs:   5%|▍         | 5/109 [07:01<3:11:15, 110.34s/it]


Processing: J_2001_SCC_OnLine_Bom_875_johri67_yahoocom_20240927_120755_1_2.pdf

Attempting to convert Dataset/J_2001_SCC_OnLine_Bom_875_johri67_yahoocom_20240927_120755_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 4173 characters


Processing PDFs:   6%|▌         | 6/109 [07:18<2:15:02, 78.66s/it] 


Processing: TP_2023_6_scc_76_91_johri67_yahoocom_20240917_142156_1_16.pdf

Attempting to convert Dataset/TP_2023_6_scc_76_91_johri67_yahoocom_20240917_142156_1_16.pdf to text...
Successfully converted PDF to 16 images
Processing page 1/16
Processing page 2/16
Processing page 3/16
Processing page 4/16
Processing page 5/16
Processing page 6/16
Processing page 7/16
Processing page 8/16
Processing page 9/16
Processing page 10/16
Processing page 11/16
Processing page 12/16
Processing page 13/16
Processing page 14/16
Processing page 15/16
Processing page 16/16
Text extraction complete. Extracted 52920 characters


Processing PDFs:   6%|▋         | 7/109 [10:09<3:04:38, 108.61s/it]


Processing: TP_2004_13_scc_681_682_johri67_yahoocom_20240927_121808_1_2.pdf

Attempting to convert Dataset/TP_2004_13_scc_681_682_johri67_yahoocom_20240927_121808_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5870 characters


Processing PDFs:   7%|▋         | 8/109 [10:29<2:15:34, 80.54s/it] 


Processing: TP_2007_11_scc_265_269_johri67_yahoocom_20240927_125356_1_5.pdf

Attempting to convert Dataset/TP_2007_11_scc_265_269_johri67_yahoocom_20240927_125356_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 16225 characters


Processing PDFs:   8%|▊         | 9/109 [11:22<2:00:03, 72.04s/it]


Processing: TP_2006_5_scc_228_239_johri67_yahoocom_20240927_122631_1_12.pdf

Attempting to convert Dataset/TP_2006_5_scc_228_239_johri67_yahoocom_20240927_122631_1_12.pdf to text...
Successfully converted PDF to 12 images
Processing page 1/12
Processing page 2/12
Processing page 3/12
Processing page 4/12
Processing page 5/12
Processing page 6/12
Processing page 7/12
Processing page 8/12
Processing page 9/12
Processing page 10/12
Processing page 11/12
Processing page 12/12
Text extraction complete. Extracted 41247 characters


Processing PDFs:   9%|▉         | 10/109 [13:34<2:28:57, 90.28s/it]


Processing: TP_2001_9_scc_602_604_johri67_yahoocom_20240927_123936_1_3.pdf

Attempting to convert Dataset/TP_2001_9_scc_602_604_johri67_yahoocom_20240927_123936_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 10095 characters


Processing PDFs:  10%|█         | 11/109 [14:05<1:57:54, 72.19s/it]


Processing: TP_2007_1_scc_222_227_johri67_yahoocom_20240927_123806_1_6.pdf

Attempting to convert Dataset/TP_2007_1_scc_222_227_johri67_yahoocom_20240927_123806_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 20982 characters


Processing PDFs:  11%|█         | 12/109 [15:10<1:53:22, 70.13s/it]


Processing: TP_2015_16_scc_253_257_johri67_yahoocom_20240927_121431_1_5.pdf

Attempting to convert Dataset/TP_2015_16_scc_253_257_johri67_yahoocom_20240927_121431_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 15625 characters


Processing PDFs:  12%|█▏        | 13/109 [16:00<1:42:10, 63.86s/it]


Processing: TP_2006_10_scc_92_96_johri67_yahoocom_20240927_125323_1_5.pdf

Attempting to convert Dataset/TP_2006_10_scc_92_96_johri67_yahoocom_20240927_125323_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 17704 characters


Processing PDFs:  13%|█▎        | 14/109 [16:55<1:37:06, 61.34s/it]


Processing: J_2024_SCC_OnLine_SC_2433_johri67_yahoocom_20240917_143154_1_6.pdf

Attempting to convert Dataset/J_2024_SCC_OnLine_SC_2433_johri67_yahoocom_20240917_143154_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 14744 characters


Processing PDFs:  14%|█▍        | 15/109 [17:56<1:35:57, 61.25s/it]


Processing: TP_2006_12_scc_114_116_johri67_yahoocom_20240927_123907_1_3.pdf

Attempting to convert Dataset/TP_2006_12_scc_114_116_johri67_yahoocom_20240927_123907_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 9491 characters


Processing PDFs:  15%|█▍        | 16/109 [18:25<1:19:38, 51.38s/it]


Processing: J_2019_SCC_OnLine_Del_8140_2020_Cri_LJ_NOC_93_31_johri67_yahoocom_20240917_143300_1_12.pdf

Attempting to convert Dataset/J_2019_SCC_OnLine_Del_8140_2020_Cri_LJ_NOC_93_31_johri67_yahoocom_20240917_143300_1_12.pdf to text...
Successfully converted PDF to 12 images
Processing page 1/12
Processing page 2/12
Processing page 3/12
Processing page 4/12
Processing page 5/12
Processing page 6/12
Processing page 7/12
Processing page 8/12
Processing page 9/12
Processing page 10/12
Processing page 11/12
Processing page 12/12
Text extraction complete. Extracted 33342 characters


Processing PDFs:  16%|█▌        | 17/109 [20:51<2:02:31, 79.91s/it]


Processing: TP_2022_2_scc_567_568_johri67_yahoocom_20240917_144532_1_2.pdf

Attempting to convert Dataset/TP_2022_2_scc_567_568_johri67_yahoocom_20240917_144532_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 4294 characters


Processing PDFs:  17%|█▋        | 18/109 [21:05<1:31:17, 60.19s/it]


Processing: J_2022_SCC_OnLine_NCLT_1027_johri67_yahoocom_20240917_145238_1_1.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_NCLT_1027_johri67_yahoocom_20240917_145238_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  17%|█▋        | 19/109 [21:12<1:06:17, 44.19s/it]

Text extraction complete. Extracted 2084 characters

Processing: TP_2011_6_scc_376_381_johri67_yahoocom_20240927_121540_1_6.pdf

Attempting to convert Dataset/TP_2011_6_scc_376_381_johri67_yahoocom_20240927_121540_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 20640 characters


Processing PDFs:  18%|█▊        | 20/109 [22:22<1:17:04, 51.96s/it]


Processing: TP_2022_12_scc_1_41_johri67_yahoocom_20240917_150404_1_41.pdf

Attempting to convert Dataset/TP_2022_12_scc_1_41_johri67_yahoocom_20240917_150404_1_41.pdf to text...
Successfully converted PDF to 41 images
Processing page 1/41
Processing page 2/41
Processing page 3/41
Processing page 4/41
Processing page 5/41
Processing page 6/41
Processing page 7/41
Processing page 8/41
Processing page 9/41
Processing page 10/41
Processing page 11/41
Processing page 12/41
Processing page 13/41
Processing page 14/41
Processing page 15/41
Processing page 16/41
Processing page 17/41
Processing page 18/41
Processing page 19/41
Processing page 20/41
Processing page 21/41
Processing page 22/41
Processing page 23/41
Processing page 24/41
Processing page 25/41
Processing page 26/41
Processing page 27/41
Processing page 28/41
Processing page 29/41
Processing page 30/41
Processing page 31/41
Processing page 32/41
Processing page 33/41
Processing page 34/41
Processing page 35/41
Processing page 36/4

Processing PDFs:  19%|█▉        | 21/109 [29:37<4:04:44, 166.87s/it]


Processing: J_2022_SCC_OnLine_SC_2155_johri67_yahoocom_20240917_144954_1_29.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_SC_2155_johri67_yahoocom_20240917_144954_1_29.pdf to text...
Successfully converted PDF to 29 images
Processing page 1/29
Processing page 2/29
Processing page 3/29
Processing page 4/29
Processing page 5/29
Processing page 6/29
Processing page 7/29
Processing page 8/29
Processing page 9/29
Processing page 10/29
Processing page 11/29
Processing page 12/29
Processing page 13/29
Processing page 14/29
Processing page 15/29
Processing page 16/29
Processing page 17/29
Processing page 18/29
Processing page 19/29
Processing page 20/29
Processing page 21/29
Processing page 22/29
Processing page 23/29
Processing page 24/29
Processing page 25/29
Processing page 26/29
Processing page 27/29
Processing page 28/29
Processing page 29/29
Text extraction complete. Extracted 64676 characters


Processing PDFs:  20%|██        | 22/109 [33:49<4:39:04, 192.47s/it]


Processing: J_2020_SCC_OnLine_Del_497_johri67_yahoocom_20240927_122413_1_2.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_Del_497_johri67_yahoocom_20240927_122413_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 6790 characters


Processing PDFs:  21%|██        | 23/109 [34:18<3:25:28, 143.35s/it]


Processing: TP_2021_17_scc_519_520_johri67_yahoocom_20240917_142621_1_2.pdf

Attempting to convert Dataset/TP_2021_17_scc_519_520_johri67_yahoocom_20240917_142621_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5922 characters


Processing PDFs:  22%|██▏       | 24/109 [34:35<2:29:22, 105.44s/it]


Processing: TP_2018_18_scc_388_391_johri67_yahoocom_20240917_145828_1_4.pdf

Attempting to convert Dataset/TP_2018_18_scc_388_391_johri67_yahoocom_20240917_145828_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 13492 characters


Processing PDFs:  23%|██▎       | 25/109 [35:18<2:01:16, 86.63s/it] 


Processing: TP_2017_2_scc_115_124_johri67_yahoocom_20240927_125737_1_10.pdf

Attempting to convert Dataset/TP_2017_2_scc_115_124_johri67_yahoocom_20240927_125737_1_10.pdf to text...
Successfully converted PDF to 10 images
Processing page 1/10
Processing page 2/10
Processing page 3/10
Processing page 4/10
Processing page 5/10
Processing page 6/10
Processing page 7/10
Processing page 8/10
Processing page 9/10
Processing page 10/10
Text extraction complete. Extracted 35349 characters


Processing PDFs:  24%|██▍       | 26/109 [37:09<2:10:05, 94.05s/it]


Processing: J_2024_SCC_OnLine_SC_425_johri67_yahoocom_20240917_151238_1_6.pdf

Attempting to convert Dataset/J_2024_SCC_OnLine_SC_425_johri67_yahoocom_20240917_151238_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 14950 characters


Processing PDFs:  25%|██▍       | 27/109 [38:18<1:58:14, 86.51s/it]


Processing: J_2021_SCC_OnLine_Mad_13750_johri67_yahoocom_20240917_150948_1_1.pdf

Attempting to convert Dataset/J_2021_SCC_OnLine_Mad_13750_johri67_yahoocom_20240917_150948_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  26%|██▌       | 28/109 [38:27<1:25:33, 63.37s/it]

Text extraction complete. Extracted 2844 characters

Processing: TP_2010_11_scc_120_125_johri67_yahoocom_20240927_121855_1_6.pdf

Attempting to convert Dataset/TP_2010_11_scc_120_125_johri67_yahoocom_20240927_121855_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 20217 characters


Processing PDFs:  27%|██▋       | 29/109 [39:33<1:25:36, 64.21s/it]


Processing: J_2019_SCC_OnLine_SC_1761_2019_309_CTR_168_johri67_yahoocom_20240917_150447_1_1.pdf

Attempting to convert Dataset/J_2019_SCC_OnLine_SC_1761_2019_309_CTR_168_johri67_yahoocom_20240917_150447_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  28%|██▊       | 30/109 [39:43<1:03:04, 47.91s/it]

Text extraction complete. Extracted 2612 characters

Processing: TP_2023_8_scc_175_180_johri67_yahoocom_20240917_141904_1_6.pdf

Attempting to convert Dataset/TP_2023_8_scc_175_180_johri67_yahoocom_20240917_141904_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 20901 characters


Processing PDFs:  28%|██▊       | 31/109 [40:49<1:09:18, 53.31s/it]


Processing: TP_2001_7_scc_417_425_johri67_yahoocom_20240927_123845_1_9.pdf

Attempting to convert Dataset/TP_2001_7_scc_417_425_johri67_yahoocom_20240927_123845_1_9.pdf to text...
Successfully converted PDF to 9 images
Processing page 1/9
Processing page 2/9
Processing page 3/9
Processing page 4/9
Processing page 5/9
Processing page 6/9
Processing page 7/9
Processing page 8/9
Processing page 9/9
Text extraction complete. Extracted 32310 characters


Processing PDFs:  29%|██▉       | 32/109 [42:35<1:28:34, 69.02s/it]


Processing: J_2021_SCC_OnLine_Bom_609_2021_3_AIR_Bom_R_Cri_23_20_johri67_yahoocom_20240917_151631_1_4.pdf

Attempting to convert Dataset/J_2021_SCC_OnLine_Bom_609_2021_3_AIR_Bom_R_Cri_23_20_johri67_yahoocom_20240917_151631_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 9086 characters


Processing PDFs:  30%|███       | 33/109 [43:16<1:16:41, 60.55s/it]


Processing: TP_1988_4_scc_54_59_johri67_yahoocom_20240927_125833_1_6.pdf

Attempting to convert Dataset/TP_1988_4_scc_54_59_johri67_yahoocom_20240927_125833_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 18237 characters


Processing PDFs:  31%|███       | 34/109 [44:14<1:14:55, 59.94s/it]


Processing: TP_2022_8_scc_499_501_johri67_yahoocom_20240917_150321_1_3.pdf

Attempting to convert Dataset/TP_2022_8_scc_499_501_johri67_yahoocom_20240917_150321_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 8626 characters


Processing PDFs:  32%|███▏      | 35/109 [44:43<1:02:34, 50.74s/it]


Processing: J_2022_SCC_OnLine_SC_2178_2023_240_COMP_CAS_985_johri67_yahoocom_20240917_150913_1_1.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_SC_2178_2023_240_COMP_CAS_985_johri67_yahoocom_20240917_150913_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1
Text extraction complete. Extracted 2180 characters

Processing: J_2020_SCC_OnLine_All_267_2020_112_ACC_574_johri67_yahoocom_20240917_150645_1_7.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_All_267_2020_112_ACC_574_johri67_yahoocom_20240917_150645_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 18339 characters


Processing PDFs:  34%|███▍      | 37/109 [46:13<1:01:04, 50.90s/it]


Processing: J_2021_SCC_OnLine_Ker_14313_johri67_yahoocom_20240917_143023_1_2.pdf

Attempting to convert Dataset/J_2021_SCC_OnLine_Ker_14313_johri67_yahoocom_20240917_143023_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2


Processing PDFs:  35%|███▍      | 38/109 [46:29<47:50, 40.43s/it]  

Text extraction complete. Extracted 3715 characters

Processing: TP_2008_1_scc_716_719_johri67_yahoocom_20240927_121830_1_4.pdf

Attempting to convert Dataset/TP_2008_1_scc_716_719_johri67_yahoocom_20240927_121830_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 13323 characters


Processing PDFs:  36%|███▌      | 39/109 [47:12<48:01, 41.17s/it]


Processing: TP_2012_9_scc_235_240_johri67_yahoocom_20240927_121915_1_6.pdf

Attempting to convert Dataset/TP_2012_9_scc_235_240_johri67_yahoocom_20240927_121915_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 19632 characters


Processing PDFs:  37%|███▋      | 40/109 [48:17<55:40, 48.41s/it]


Processing: J_2009_SCC_OnLine_Chh_433_2009_3_CGLJ_307_johri67_yahoocom_20240927_125522_1_16.pdf

Attempting to convert Dataset/J_2009_SCC_OnLine_Chh_433_2009_3_CGLJ_307_johri67_yahoocom_20240927_125522_1_16.pdf to text...
Successfully converted PDF to 16 images
Processing page 1/16
Processing page 2/16
Processing page 3/16
Processing page 4/16
Processing page 5/16
Processing page 6/16
Processing page 7/16
Processing page 8/16
Processing page 9/16
Processing page 10/16
Processing page 11/16
Processing page 12/16
Processing page 13/16
Processing page 14/16
Processing page 15/16
Processing page 16/16
Text extraction complete. Extracted 41662 characters


Processing PDFs:  38%|███▊      | 41/109 [51:27<1:43:01, 90.91s/it]


Processing: J_2020_SCC_OnLine_Del_497_johri67_yahoocom_20240927_122444_1_2.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_Del_497_johri67_yahoocom_20240927_122444_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 6790 characters


Processing PDFs:  39%|███▊      | 42/109 [51:59<1:21:31, 73.01s/it]


Processing: J_2006_SCC_OnLine_Chh_86_2006_46_AIC_169_2006_1_CGLJ_johri67_yahoocom_20240927_125605_1_5.pdf

Attempting to convert Dataset/J_2006_SCC_OnLine_Chh_86_2006_46_AIC_169_2006_1_CGLJ_johri67_yahoocom_20240927_125605_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 16719 characters


Processing PDFs:  39%|███▉      | 43/109 [53:14<1:21:04, 73.71s/it]


Processing: TP_2008_15_scc_212_215_johri67_yahoocom_20240927_123704_1_4.pdf

Attempting to convert Dataset/TP_2008_15_scc_212_215_johri67_yahoocom_20240927_123704_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 11961 characters


Processing PDFs:  40%|████      | 44/109 [53:51<1:07:52, 62.66s/it]


Processing: TP_2006_8_scc_629_637_johri67_yahoocom_20240927_123827_1_9.pdf

Attempting to convert Dataset/TP_2006_8_scc_629_637_johri67_yahoocom_20240927_123827_1_9.pdf to text...
Successfully converted PDF to 9 images
Processing page 1/9
Processing page 2/9
Processing page 3/9
Processing page 4/9
Processing page 5/9
Processing page 6/9
Processing page 7/9
Processing page 8/9
Processing page 9/9
Text extraction complete. Extracted 32141 characters


Processing PDFs:  41%|████▏     | 45/109 [55:30<1:18:38, 73.72s/it]


Processing: TP_1994_supp_3_scc_104_109_johri67_yahoocom_20240927_125908_1_6.pdf

Attempting to convert Dataset/TP_1994_supp_3_scc_104_109_johri67_yahoocom_20240927_125908_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 21226 characters


Processing PDFs:  42%|████▏     | 46/109 [56:34<1:14:12, 70.67s/it]


Processing: J_2023_SCC_OnLine_All_2502_2024_162_ALR_819_2024_256_johri67_yahoocom_20240917_144605_1_8.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_All_2502_2024_162_ALR_819_2024_256_johri67_yahoocom_20240917_144605_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 21100 characters


Processing PDFs:  43%|████▎     | 47/109 [58:03<1:18:42, 76.17s/it]


Processing: J_2017_SCC_OnLine_All_3919_2018_130_ALR_48_2018_190_johri67_yahoocom_20240917_144638_1_5.pdf

Attempting to convert Dataset/J_2017_SCC_OnLine_All_3919_2018_130_ALR_48_2018_190_johri67_yahoocom_20240917_144638_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 10898 characters


Processing PDFs:  44%|████▍     | 48/109 [58:46<1:07:16, 66.18s/it]


Processing: J_2024_SCC_OnLine_Del_2366_johri67_yahoocom_20240917_143224_1_7.pdf

Attempting to convert Dataset/J_2024_SCC_OnLine_Del_2366_johri67_yahoocom_20240917_143224_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 18929 characters


Processing PDFs:  45%|████▍     | 49/109 [1:00:05<1:10:03, 70.06s/it]


Processing: J_2010_SCC_OnLine_Del_2525_johri67_yahoocom_20240927_120908_1_7.pdf

Attempting to convert Dataset/J_2010_SCC_OnLine_Del_2525_johri67_yahoocom_20240927_120908_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 24449 characters


Processing PDFs:  46%|████▌     | 50/109 [1:01:50<1:19:09, 80.51s/it]


Processing: J_2019_SCC_OnLine_Bom_714_johri67_yahoocom_20240917_152251_1_8.pdf

Attempting to convert Dataset/J_2019_SCC_OnLine_Bom_714_johri67_yahoocom_20240917_152251_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 30359 characters


Processing PDFs:  47%|████▋     | 51/109 [1:03:59<1:32:04, 95.26s/it]


Processing: TP_2010_1_scc_322_335_johri67_yahoocom_20240927_121942_1_14.pdf

Attempting to convert Dataset/TP_2010_1_scc_322_335_johri67_yahoocom_20240927_121942_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 49564 characters


Processing PDFs:  48%|████▊     | 52/109 [1:06:34<1:47:28, 113.13s/it]


Processing: J_2016_SCC_OnLine_SC_1804_johri67_yahoocom_20240917_150122_1_1.pdf

Attempting to convert Dataset/J_2016_SCC_OnLine_SC_1804_johri67_yahoocom_20240917_150122_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  49%|████▊     | 53/109 [1:06:42<1:16:00, 81.43s/it] 

Text extraction complete. Extracted 1762 characters

Processing: J_2023_SCC_OnLine_Utt_918_johri67_yahoocom_20240917_145059_1_2.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_Utt_918_johri67_yahoocom_20240917_145059_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 4429 characters


Processing PDFs:  50%|████▉     | 54/109 [1:06:58<56:46, 61.93s/it]  


Processing: TP_2010_9_scc_618_629_johri67_yahoocom_20240927_121457_1_12.pdf

Attempting to convert Dataset/TP_2010_9_scc_618_629_johri67_yahoocom_20240927_121457_1_12.pdf to text...
Successfully converted PDF to 12 images
Processing page 1/12
Processing page 2/12
Processing page 3/12
Processing page 4/12
Processing page 5/12
Processing page 6/12
Processing page 7/12
Processing page 8/12
Processing page 9/12
Processing page 10/12
Processing page 11/12
Processing page 12/12
Text extraction complete. Extracted 40354 characters


Processing PDFs:  50%|█████     | 55/109 [1:09:07<1:13:40, 81.86s/it]


Processing: TP_2013_14_scc_331_340_johri67_yahoocom_20240927_125431_1_10.pdf

Attempting to convert Dataset/TP_2013_14_scc_331_340_johri67_yahoocom_20240927_125431_1_10.pdf to text...
Successfully converted PDF to 10 images
Processing page 1/10
Processing page 2/10
Processing page 3/10
Processing page 4/10
Processing page 5/10
Processing page 6/10
Processing page 7/10
Processing page 8/10
Processing page 9/10
Processing page 10/10
Text extraction complete. Extracted 32638 characters


Processing PDFs:  51%|█████▏    | 56/109 [1:10:49<1:17:43, 87.99s/it]


Processing: TP_2004_4_scc_684_697_johri67_yahoocom_20240927_122803_1_14.pdf

Attempting to convert Dataset/TP_2004_4_scc_684_697_johri67_yahoocom_20240927_122803_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 51259 characters


Processing PDFs:  52%|█████▏    | 57/109 [1:13:33<1:36:09, 110.96s/it]


Processing: TP_2022_13_scc_134_135_johri67_yahoocom_20240917_145029_1_2.pdf

Attempting to convert Dataset/TP_2022_13_scc_134_135_johri67_yahoocom_20240917_145029_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 4762 characters


Processing PDFs:  53%|█████▎    | 58/109 [1:13:48<1:09:49, 82.15s/it] 


Processing: N_2011_9_SCC_272_2011_3_SCC_Cri_689_2011_SCC_OnLin_johri67_yahoocom_20240927_121242_1_1.pdf

Attempting to convert Dataset/N_2011_9_SCC_272_2011_3_SCC_Cri_689_2011_SCC_OnLin_johri67_yahoocom_20240927_121242_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1
Text extraction complete. Extracted 4462 characters


Processing PDFs:  54%|█████▍    | 59/109 [1:14:02<51:27, 61.76s/it]  


Processing: TP_2005_1_mah_lj_334_338_johri67_yahoocom_20240927_120620_1_5.pdf

Attempting to convert Dataset/TP_2005_1_mah_lj_334_338_johri67_yahoocom_20240927_120620_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 17550 characters


Processing PDFs:  55%|█████▌    | 60/109 [1:14:56<48:27, 59.33s/it]


Processing: TP_2004_3_lln_748_752_johri67_yahoocom_20240927_120843_1_5.pdf

Attempting to convert Dataset/TP_2004_3_lln_748_752_johri67_yahoocom_20240927_120843_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 20791 characters


Processing PDFs:  56%|█████▌    | 61/109 [1:15:57<47:48, 59.75s/it]


Processing: J_2023_SCC_OnLine_Bom_273_2023_1_AIR_Bom_R_Cri_842_johri67_yahoocom_20240917_144006_1_7.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_Bom_273_2023_1_AIR_Bom_R_Cri_842_johri67_yahoocom_20240917_144006_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 19253 characters


Processing PDFs:  57%|█████▋    | 62/109 [1:17:17<51:34, 65.84s/it]


Processing: N_2012_10_SCC_561_2013_1_SCC_Cri_105_2012_SCC_OnLi_johri67_yahoocom_20240927_121352_1_3.pdf

Attempting to convert Dataset/N_2012_10_SCC_561_2013_1_SCC_Cri_105_2012_SCC_OnLi_johri67_yahoocom_20240927_121352_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 9553 characters


Processing PDFs:  58%|█████▊    | 63/109 [1:17:49<42:48, 55.83s/it]


Processing: TP_2021_10_scc_706_716_johri67_yahoocom_20240917_141627_1_11.pdf

Attempting to convert Dataset/TP_2021_10_scc_706_716_johri67_yahoocom_20240917_141627_1_11.pdf to text...
Successfully converted PDF to 11 images
Processing page 1/11
Processing page 2/11
Processing page 3/11
Processing page 4/11
Processing page 5/11
Processing page 6/11
Processing page 7/11
Processing page 8/11
Processing page 9/11
Processing page 10/11
Processing page 11/11
Text extraction complete. Extracted 37794 characters


Processing PDFs:  59%|█████▊    | 64/109 [1:19:47<55:51, 74.47s/it]


Processing: TP_2005_5_scc_181_194_johri67_yahoocom_20240927_122545_1_14.pdf

Attempting to convert Dataset/TP_2005_5_scc_181_194_johri67_yahoocom_20240927_122545_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 49977 characters


Processing PDFs:  60%|█████▉    | 65/109 [1:22:25<1:12:59, 99.54s/it]


Processing: TP_2003_8_scc_250_263_johri67_yahoocom_20240927_125926_1_14.pdf

Attempting to convert Dataset/TP_2003_8_scc_250_263_johri67_yahoocom_20240927_125926_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 50044 characters


Processing PDFs:  61%|██████    | 66/109 [1:25:04<1:24:06, 117.37s/it]


Processing: J_2022_SCC_OnLine_NCLT_11945_johri67_yahoocom_20240917_145132_1_2.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_NCLT_11945_johri67_yahoocom_20240917_145132_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2


Processing PDFs:  61%|██████▏   | 67/109 [1:25:16<59:51, 85.52s/it]   

Text extraction complete. Extracted 2673 characters

Processing: J_2020_SCC_OnLine_Bom_3417_johri67_yahoocom_20240917_144306_1_1.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_Bom_3417_johri67_yahoocom_20240917_144306_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  62%|██████▏   | 68/109 [1:25:28<43:31, 63.71s/it]

Text extraction complete. Extracted 3225 characters

Processing: J_2019_SCC_OnLine_SC_2137_johri67_yahoocom_20240917_144453_1_1.pdf

Attempting to convert Dataset/J_2019_SCC_OnLine_SC_2137_johri67_yahoocom_20240917_144453_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  63%|██████▎   | 69/109 [1:25:39<31:47, 47.69s/it]

Text extraction complete. Extracted 2614 characters

Processing: TP_2010_9_scc_701_712_johri67_yahoocom_20240927_121603_1_12.pdf

Attempting to convert Dataset/TP_2010_9_scc_701_712_johri67_yahoocom_20240927_121603_1_12.pdf to text...
Successfully converted PDF to 12 images
Processing page 1/12
Processing page 2/12
Processing page 3/12
Processing page 4/12
Processing page 5/12
Processing page 6/12
Processing page 7/12
Processing page 8/12
Processing page 9/12
Processing page 10/12
Processing page 11/12
Processing page 12/12
Text extraction complete. Extracted 42940 characters


Processing PDFs:  64%|██████▍   | 70/109 [1:27:57<48:38, 74.83s/it]


Processing: J_2024_SCC_OnLine_SC_2398_johri67_yahoocom_20240917_150055_1_2.pdf

Attempting to convert Dataset/J_2024_SCC_OnLine_SC_2398_johri67_yahoocom_20240917_150055_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2


Processing PDFs:  65%|██████▌   | 71/109 [1:28:07<35:10, 55.54s/it]

Text extraction complete. Extracted 2651 characters

Processing: TP_2013_6_scc_333_347_johri67_yahoocom_20240927_125816_1_15.pdf

Attempting to convert Dataset/TP_2013_6_scc_333_347_johri67_yahoocom_20240927_125816_1_15.pdf to text...
Successfully converted PDF to 15 images
Processing page 1/15
Processing page 2/15
Processing page 3/15
Processing page 4/15
Processing page 5/15
Processing page 6/15
Processing page 7/15
Processing page 8/15
Processing page 9/15
Processing page 10/15
Processing page 11/15
Processing page 12/15
Processing page 13/15
Processing page 14/15
Processing page 15/15
Text extraction complete. Extracted 53035 characters


Processing PDFs:  66%|██████▌   | 72/109 [1:30:52<54:27, 88.31s/it]


Processing: J_2024_SCC_OnLine_All_2396_johri67_yahoocom_20240917_150612_1_4.pdf

Attempting to convert Dataset/J_2024_SCC_OnLine_All_2396_johri67_yahoocom_20240917_150612_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 9144 characters


Processing PDFs:  67%|██████▋   | 73/109 [1:31:31<44:00, 73.34s/it]


Processing: J_2005_SCC_OnLine_Bom_861_johri67_yahoocom_20240927_120454_1_7.pdf

Attempting to convert Dataset/J_2005_SCC_OnLine_Bom_861_johri67_yahoocom_20240927_120454_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 23803 characters


Processing PDFs:  68%|██████▊   | 74/109 [1:33:15<48:14, 82.69s/it]


Processing: J_2021_SCC_OnLine_Bom_842_2021_5_Bom_CR_471_johri67_yahoocom_20240917_151454_1_4.pdf

Attempting to convert Dataset/J_2021_SCC_OnLine_Bom_842_2021_5_Bom_CR_471_johri67_yahoocom_20240917_151454_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 9188 characters


Processing PDFs:  69%|██████▉   | 75/109 [1:33:53<39:17, 69.35s/it]


Processing: TP_2014_5_scc_689_696_johri67_yahoocom_20240927_125415_1_8.pdf

Attempting to convert Dataset/TP_2014_5_scc_689_696_johri67_yahoocom_20240927_125415_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 29400 characters


Processing PDFs:  70%|██████▉   | 76/109 [1:35:27<42:07, 76.59s/it]


Processing: J_2020_SCC_OnLine_Bom_3417_johri67_yahoocom_20240917_144035_1_1.pdf

Attempting to convert Dataset/J_2020_SCC_OnLine_Bom_3417_johri67_yahoocom_20240917_144035_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1


Processing PDFs:  71%|███████   | 77/109 [1:35:40<30:38, 57.45s/it]

Text extraction complete. Extracted 3221 characters

Processing: TP_2002_1_scc_652_655_johri67_yahoocom_20240927_122056_1_4.pdf

Attempting to convert Dataset/TP_2002_1_scc_652_655_johri67_yahoocom_20240927_122056_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 12909 characters


Processing PDFs:  72%|███████▏  | 78/109 [1:36:19<26:55, 52.10s/it]


Processing: TP_2023_9_scc_130_132_johri67_yahoocom_20240917_143914_1_3.pdf

Attempting to convert Dataset/TP_2023_9_scc_130_132_johri67_yahoocom_20240917_143914_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 8015 characters


Processing PDFs:  72%|███████▏  | 79/109 [1:36:45<22:05, 44.17s/it]


Processing: TP_2022_3_scc_633_646_johri67_yahoocom_20240927_122155_1_14.pdf

Attempting to convert Dataset/TP_2022_3_scc_633_646_johri67_yahoocom_20240927_122155_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 50383 characters


Processing PDFs:  73%|███████▎  | 80/109 [1:39:25<38:06, 78.85s/it]


Processing: J_2021_SCC_OnLine_Del_3654_johri67_yahoocom_20240927_122348_1_2.pdf

Attempting to convert Dataset/J_2021_SCC_OnLine_Del_3654_johri67_yahoocom_20240927_122348_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 6759 characters


Processing PDFs:  74%|███████▍  | 81/109 [1:39:53<29:42, 63.66s/it]


Processing: TP_2005_11_scc_429_430_johri67_yahoocom_20240927_125259_1_2.pdf

Attempting to convert Dataset/TP_2005_11_scc_429_430_johri67_yahoocom_20240927_125259_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5298 characters


Processing PDFs:  75%|███████▌  | 82/109 [1:40:09<22:15, 49.44s/it]


Processing: TP_2001_2_mah_lj_500_506_johri67_yahoocom_20240927_120525_1_7.pdf

Attempting to convert Dataset/TP_2001_2_mah_lj_500_506_johri67_yahoocom_20240927_120525_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 22754 characters


Processing PDFs:  76%|███████▌  | 83/109 [1:41:18<23:59, 55.38s/it]


Processing: TP_2018_12_scc_593_594_johri67_yahoocom_20240917_150021_1_2.pdf

Attempting to convert Dataset/TP_2018_12_scc_593_594_johri67_yahoocom_20240917_150021_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 4805 characters


Processing PDFs:  77%|███████▋  | 84/109 [1:41:35<18:14, 43.79s/it]


Processing: J_2022_SCC_OnLine_SC_942_johri67_yahoocom_20240927_122213_1_5.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_SC_942_johri67_yahoocom_20240927_122213_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 15378 characters


Processing PDFs:  78%|███████▊  | 85/109 [1:42:43<20:21, 50.90s/it]


Processing: J_2009_SCC_OnLine_Bom_1944_johri67_yahoocom_20240927_120636_1_4.pdf

Attempting to convert Dataset/J_2009_SCC_OnLine_Bom_1944_johri67_yahoocom_20240927_120636_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 13992 characters


Processing PDFs:  79%|███████▉  | 86/109 [1:43:44<20:42, 54.01s/it]


Processing: N_2010_9_SCC_701_2010_SCC_OnLine_SC_941_johri67_yahoocom_20240927_121309_1_1.pdf

Attempting to convert Dataset/N_2010_9_SCC_701_2010_SCC_OnLine_SC_941_johri67_yahoocom_20240927_121309_1_1.pdf to text...
Successfully converted PDF to 1 images
Processing page 1/1
Text extraction complete. Extracted 4936 characters


Processing PDFs:  80%|███████▉  | 87/109 [1:44:01<15:42, 42.86s/it]


Processing: J_2022_SCC_OnLine_Bom_6591_johri67_yahoocom_20240917_144330_1_2.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_Bom_6591_johri67_yahoocom_20240917_144330_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5307 characters


Processing PDFs:  81%|████████  | 88/109 [1:44:24<12:55, 36.91s/it]


Processing: J_2010_SCC_OnLine_Del_1648_johri67_yahoocom_20240927_120933_1_8.pdf

Attempting to convert Dataset/J_2010_SCC_OnLine_Del_1648_johri67_yahoocom_20240927_120933_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 20995 characters


Processing PDFs:  82%|████████▏ | 89/109 [1:45:57<17:54, 53.73s/it]


Processing: TP_2013_10_scc_292_298_johri67_yahoocom_20240927_121645_1_7.pdf

Attempting to convert Dataset/TP_2013_10_scc_292_298_johri67_yahoocom_20240927_121645_1_7.pdf to text...
Successfully converted PDF to 7 images
Processing page 1/7
Processing page 2/7
Processing page 3/7
Processing page 4/7
Processing page 5/7
Processing page 6/7
Processing page 7/7
Text extraction complete. Extracted 24314 characters


Processing PDFs:  83%|████████▎ | 90/109 [1:47:13<19:12, 60.64s/it]


Processing: J_2023_SCC_OnLine_SC_2108_johri67_yahoocom_20240917_150747_1_4.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_SC_2108_johri67_yahoocom_20240917_150747_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 5887 characters


Processing PDFs:  83%|████████▎ | 91/109 [1:47:34<14:36, 48.67s/it]


Processing: TP_2007_7_scc_394_413_johri67_yahoocom_20240927_122824_1_20.pdf

Attempting to convert Dataset/TP_2007_7_scc_394_413_johri67_yahoocom_20240927_122824_1_20.pdf to text...
Successfully converted PDF to 20 images
Processing page 1/20
Processing page 2/20
Processing page 3/20
Processing page 4/20
Processing page 5/20
Processing page 6/20
Processing page 7/20
Processing page 8/20
Processing page 9/20
Processing page 10/20
Processing page 11/20
Processing page 12/20
Processing page 13/20
Processing page 14/20
Processing page 15/20
Processing page 16/20
Processing page 17/20
Processing page 18/20
Processing page 19/20
Processing page 20/20
Text extraction complete. Extracted 71708 characters


Processing PDFs:  84%|████████▍ | 92/109 [1:51:22<29:02, 102.48s/it]


Processing: J_2007_SCC_OnLine_Bom_1596_2008_1_AIR_Bom_R_284_2008_C_johri67_yahoocom_20240927_120429_1_3.pdf

Attempting to convert Dataset/J_2007_SCC_OnLine_Bom_1596_2008_1_AIR_Bom_R_284_2008_C_johri67_yahoocom_20240927_120429_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 8646 characters


Processing PDFs:  85%|████████▌ | 93/109 [1:51:58<22:00, 82.51s/it] 


Processing: TP_2002_4_scc_721_726_johri67_yahoocom_20240927_125718_1_6.pdf

Attempting to convert Dataset/TP_2002_4_scc_721_726_johri67_yahoocom_20240927_125718_1_6.pdf to text...
Successfully converted PDF to 6 images
Processing page 1/6
Processing page 2/6
Processing page 3/6
Processing page 4/6
Processing page 5/6
Processing page 6/6
Text extraction complete. Extracted 20662 characters


Processing PDFs:  86%|████████▌ | 94/109 [1:53:05<19:24, 77.67s/it]


Processing: TP_2023_2_hcc_bom_769_792_johri67_yahoocom_20240917_151425_1_24.pdf

Attempting to convert Dataset/TP_2023_2_hcc_bom_769_792_johri67_yahoocom_20240917_151425_1_24.pdf to text...
Successfully converted PDF to 24 images
Processing page 1/24
Processing page 2/24
Processing page 3/24
Processing page 4/24
Processing page 5/24
Processing page 6/24
Processing page 7/24
Processing page 8/24
Processing page 9/24
Processing page 10/24
Processing page 11/24
Processing page 12/24
Processing page 13/24
Processing page 14/24
Processing page 15/24
Processing page 16/24
Processing page 17/24
Processing page 18/24
Processing page 19/24
Processing page 20/24
Processing page 21/24
Processing page 22/24
Processing page 23/24
Processing page 24/24
Text extraction complete. Extracted 78039 characters


Processing PDFs:  87%|████████▋ | 95/109 [1:57:05<29:30, 126.44s/it]


Processing: J_2014_SCC_OnLine_Chh_20_2014_Cri_LJ_2684_2015_145_AIC_johri67_yahoocom_20240927_125458_1_5.pdf

Attempting to convert Dataset/J_2014_SCC_OnLine_Chh_20_2014_Cri_LJ_2684_2015_145_AIC_johri67_yahoocom_20240927_125458_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 15463 characters


Processing PDFs:  88%|████████▊ | 96/109 [1:58:14<23:38, 109.15s/it]


Processing: TP_1994_4_scc_78_85_johri67_yahoocom_20240927_125757_1_8.pdf

Attempting to convert Dataset/TP_1994_4_scc_78_85_johri67_yahoocom_20240927_125757_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 28293 characters


Processing PDFs:  89%|████████▉ | 97/109 [1:59:37<20:18, 101.51s/it]


Processing: J_2009_SCC_OnLine_Bom_1190_johri67_yahoocom_20240927_120554_1_5.pdf

Attempting to convert Dataset/J_2009_SCC_OnLine_Bom_1190_johri67_yahoocom_20240927_120554_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 21245 characters


Processing PDFs:  90%|████████▉ | 98/109 [2:01:13<18:18, 99.82s/it] 


Processing: J_2022_SCC_OnLine_Del_997_johri67_yahoocom_20240917_142700_1_2.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_Del_997_johri67_yahoocom_20240917_142700_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5911 characters


Processing PDFs:  91%|█████████ | 99/109 [2:01:38<12:53, 77.34s/it]


Processing: J_2013_SCC_OnLine_Chh_178_2013_Cri_LJ_1732_2013_2_CGLJ_johri67_yahoocom_20240927_125543_1_8.pdf

Attempting to convert Dataset/J_2013_SCC_OnLine_Chh_178_2013_Cri_LJ_1732_2013_2_CGLJ_johri67_yahoocom_20240927_125543_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 26617 characters


Processing PDFs:  92%|█████████▏| 100/109 [2:03:35<13:24, 89.34s/it]


Processing: TP_2011_10_scc_215_223_johri67_yahoocom_20240927_121111_1_9.pdf

Attempting to convert Dataset/TP_2011_10_scc_215_223_johri67_yahoocom_20240927_121111_1_9.pdf to text...
Successfully converted PDF to 9 images
Processing page 1/9
Processing page 2/9
Processing page 3/9
Processing page 4/9
Processing page 5/9
Processing page 6/9
Processing page 7/9
Processing page 8/9
Processing page 9/9
Text extraction complete. Extracted 30715 characters


Processing PDFs:  93%|█████████▎| 101/109 [2:05:15<12:18, 92.29s/it]


Processing: TP_2014_15_scc_357_359_johri67_yahoocom_20240927_121051_1_3.pdf

Attempting to convert Dataset/TP_2014_15_scc_357_359_johri67_yahoocom_20240927_121051_1_3.pdf to text...
Successfully converted PDF to 3 images
Processing page 1/3
Processing page 2/3
Processing page 3/3
Text extraction complete. Extracted 8529 characters


Processing PDFs:  94%|█████████▎| 102/109 [2:05:43<08:31, 73.04s/it]


Processing: J_2023_SCC_OnLine_SC_66_johri67_yahoocom_20240917_151840_1_5.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_SC_66_johri67_yahoocom_20240917_151840_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 13709 characters


Processing PDFs:  94%|█████████▍| 103/109 [2:06:43<06:55, 69.25s/it]


Processing: J_2022_SCC_OnLine_SC_1032_johri67_yahoocom_20240917_142536_1_5.pdf

Attempting to convert Dataset/J_2022_SCC_OnLine_SC_1032_johri67_yahoocom_20240917_142536_1_5.pdf to text...
Successfully converted PDF to 5 images
Processing page 1/5
Processing page 2/5
Processing page 3/5
Processing page 4/5
Processing page 5/5
Text extraction complete. Extracted 15177 characters


Processing PDFs:  95%|█████████▌| 104/109 [2:07:50<05:42, 68.52s/it]


Processing: J_2023_SCC_OnLine_Cal_370_johri67_yahoocom_20240917_150152_1_14.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_Cal_370_johri67_yahoocom_20240917_150152_1_14.pdf to text...
Successfully converted PDF to 14 images
Processing page 1/14
Processing page 2/14
Processing page 3/14
Processing page 4/14
Processing page 5/14
Processing page 6/14
Processing page 7/14
Processing page 8/14
Processing page 9/14
Processing page 10/14
Processing page 11/14
Processing page 12/14
Processing page 13/14
Processing page 14/14
Text extraction complete. Extracted 37189 characters


Processing PDFs:  96%|█████████▋| 105/109 [2:10:42<06:37, 99.48s/it]


Processing: TP_2010_12_scc_190_198_johri67_yahoocom_20240927_123953_1_9.pdf

Attempting to convert Dataset/TP_2010_12_scc_190_198_johri67_yahoocom_20240927_123953_1_9.pdf to text...
Successfully converted PDF to 9 images
Processing page 1/9
Processing page 2/9
Processing page 3/9
Processing page 4/9
Processing page 5/9
Processing page 6/9
Processing page 7/9
Processing page 8/9
Processing page 9/9
Text extraction complete. Extracted 29705 characters


Processing PDFs:  97%|█████████▋| 106/109 [2:12:17<04:54, 98.30s/it]


Processing: TP_2009_4_scc_439_446_johri67_yahoocom_20240927_122023_1_8.pdf

Attempting to convert Dataset/TP_2009_4_scc_439_446_johri67_yahoocom_20240927_122023_1_8.pdf to text...
Successfully converted PDF to 8 images
Processing page 1/8
Processing page 2/8
Processing page 3/8
Processing page 4/8
Processing page 5/8
Processing page 6/8
Processing page 7/8
Processing page 8/8
Text extraction complete. Extracted 26906 characters


Processing PDFs:  98%|█████████▊| 107/109 [2:13:43<03:09, 94.67s/it]


Processing: J_2023_SCC_OnLine_SC_1310_johri67_yahoocom_20240917_142228_1_2.pdf

Attempting to convert Dataset/J_2023_SCC_OnLine_SC_1310_johri67_yahoocom_20240917_142228_1_2.pdf to text...
Successfully converted PDF to 2 images
Processing page 1/2
Processing page 2/2
Text extraction complete. Extracted 5382 characters


Processing PDFs:  99%|█████████▉| 108/109 [2:14:07<01:13, 73.33s/it]


Processing: J_2001_SCC_OnLine_Bom_1125_2002_Cri_LJ_1280_johri67_yahoocom_20240927_120727_1_4.pdf

Attempting to convert Dataset/J_2001_SCC_OnLine_Bom_1125_2002_Cri_LJ_1280_johri67_yahoocom_20240927_120727_1_4.pdf to text...
Successfully converted PDF to 4 images
Processing page 1/4
Processing page 2/4
Processing page 3/4
Processing page 4/4
Text extraction complete. Extracted 12988 characters


Processing PDFs: 100%|██████████| 109/109 [2:15:03<00:00, 74.34s/it]

Results saved to output_csv



