# NEET-UG 2024 Data Extraction - Tamil Nadu

### The code given below can be used to extract data from any state. Just make sure to change the start_num and end_num variables to the minimum value and maximum value of the exam codes respectively in that particular state.

In [None]:
import tqdm
import requests
from urllib.parse import urlparse
from requests.exceptions import RequestException
from concurrent.futures import ThreadPoolExecutor, as_completed

def download_pdf_from_url(url, save_path):
    try:
        response = requests.get(url)
       
        if response.status_code == 200:
            if response.headers.get('content-type') == 'application/pdf':
                parsed_url = urlparse(url)
                filename = parsed_url.path.split('/')[-1]
               
                with open(save_path + filename, 'wb') as f:
                    f.write(response.content)
               
            else:
                pass
        else:
            pass
   
    except RequestException as e:
        print(f"Skipping URL {url}: Error occurred during request: {str(e)}")

def process_batch(start, end, url_base, save_path):
    for i in range(start, end + 1):
        url = f"{url_base}{i}.pdf"
        download_pdf_from_url(url, save_path)

url_base = 'https://neetfs.ntaonline.in/NEET_2024_Result/'
start_num = 410000
end_num = 414000

save_path = 'D:\\NEET UG 2024 Analysis\\Center Data PDFs\\' 

num_threads = 10
batch_size = (end_num - start_num + 1) // num_threads

with ThreadPoolExecutor(max_workers=num_threads) as executor:
    futures = []
    for i in range(num_threads):
        batch_start = start_num + i * batch_size
        batch_end = batch_start + batch_size - 1
        if i == num_threads - 1:
            batch_end = end_num  
        futures.append(executor.submit(process_batch, batch_start, batch_end, url_base, save_path))

    for future in tqdm.tqdm(as_completed(futures), total=num_threads):
        future.result()
