In [None]:
import requests
from PyPDF2 import PdfMerger
import os
from datetime import datetime
import concurrent.futures

def download_pdf(url, filename):
    try:
        response = requests.get(url)
        if response.status_code == 200 and len(response.content) > 0:
            with open(filename, 'wb') as f:
                f.write(response.content)
            return filename
    except Exception as e:
        print(f"Error downloading {url}: {e}")
    return None

def merge_pdfs(pdf_files, output_filename):
    try:
        merger = PdfMerger()
        for pdf in sorted(pdf_files, key=lambda x: int(x.split('_')[1].split('.')[0])):
            if pdf:
                merger.append(pdf)
        merger.write(output_filename)
        merger.close()
        print(f"Successfully merged {len(pdf_files)} PDFs into {output_filename}")
    except Exception as e:
        print(f"Error merging PDFs: {e}")

def download_and_merge_newspaper(date_str):
    base_url = "https://epaper.pudhari.news/download.php?file=https://enewspapr.com/News/PUDHARI/KOL/{year}/{month}/{day}/{date_str}_{page}.PDF&pageno={page}"
    
    year, month, day = date_str[:4], date_str[4:6], date_str[6:]
    formatted_url = base_url.format(year=year, month=month, day=day, date_str=date_str, page="{page}")

    pdf_files = []
    page = 1

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_page = {}
        while True:
            future = executor.submit(download_pdf, formatted_url.format(page=page), f"page_{page}.pdf")
            future_to_page[future] = page
            page += 1
            if len(future_to_page) >= 10:
                for future in concurrent.futures.as_completed(future_to_page):
                    page_num = future_to_page[future]
                    filename = future.result()
                    if filename:
                        pdf_files.append(filename)
                future_to_page.clear()
                if not filename:
                    break
            if page > 40: 
                break

    if pdf_files:
        output_filename = f"PUDHARI_KOLHAPUR_{date_str}.pdf"
        merge_pdfs(pdf_files, output_filename)
        
        for pdf in pdf_files:
            os.remove(pdf)

        file_size = os.path.getsize(output_filename) / (1024 * 1024) 
        print(f"Downloaded {len(pdf_files)} pages into {output_filename} with size {file_size:.2f} MB.")
    else:
        print("No pages were downloaded. Please check the date and try again.")

if __name__ == "__main__":
    today_date = datetime.now().strftime("%Y%m%d")
    download_and_merge_newspaper(today_date)