# Converting .tif to .pdf

In [7]:
from PIL import Image, ImageSequence
import os


def tiff_to_pdf(tiff_path: str) -> str:

    pdf_path = tiff_path.replace(".tif", ".pdf")
    if not os.path.exists(tiff_path):
        raise Exception(f"{tiff_path} not found")
    image = Image.open(tiff_path)

    images = []
    for i, page in enumerate(ImageSequence.Iterator(image)):
        page = page.convert("RGB")
        images.append(page)
    if len(images) == 1:
        images[0].save(pdf_path)
    else:
        images[0].save(pdf_path, save_all=True, append_images=images[1:])
    return pdf_path

In [9]:
# Test the above function
tiff_to_pdf(r"C:\Users\dane.parks\Downloads\83142-001.tif")

'C:\\Users\\dane.parks\\Downloads\\83142-001.pdf'

# Creating/Extracting Zip files

In [153]:
import shutil

output_filename = r"C:\Users\dane.parks\Downloads\Nikhil Files"
folder_to_zip = r"C:\Users\dane.parks\Downloads\Nikhil Files"
shutil.make_archive(output_filename, "zip", folder_to_zip)

'C:\\Users\\dane.parks\\Downloads\\Nikhil Files.zip'

In [154]:
import zipfile

file_to_extract = r"C:\Users\dane.parks\Downloads\Nikhil Files.zip"
directory_to_extract_to = r"C:\Users\dane.parks\Downloads\Nikhil Files"

In [155]:
with zipfile.ZipFile(file_to_extract, "r") as zip_ref:
    zip_ref.extractall(directory_to_extract_to)

# Downloading from a State FTP page

https://ftp.dot.state.oh.us/pub/construction/JEF7MineGroutPDB/RID/

In [144]:
import requests
from bs4 import BeautifulSoup
from pathlib import Path

year = "2023"
url = f"https://ftp.dot.state.oh.us/pub/construction/JEF7MineGroutPDB/RID/{year}/"

In [145]:
def get_list_of_files(url):
    reqs = requests.get(url)
    soup = BeautifulSoup(reqs.text, "html.parser")

    short_urls = []
    for link in soup.find_all("a"):
        short_urls.append(link.get("href"))

    root = "https://ftp.dot.state.oh.us/"
    full_urls = []
    for url in short_urls:
        full_urls.append(root + url)
        print(f"{root+url}")

    return full_urls

In [146]:
def download_file(url):
    response = requests.get(url)
    if "content-disposition" in response.headers:
        content_disposition = response.headers["content-disposition"]
        filename = content_disposition.split("filename=")[1]
    else:
        filename = url.split("/")[-1]
    with open(
        Path(
            "C:\\Users\\dane.parks\\Downloads\\Nikhil Files\\"
            + Path(url).parent.name
            + f"\\{filename}"
        ),
        mode="wb",
    ) as file:
        file.write(response.content)
    print(f"Downloaded file {filename}")

In [147]:
full_urls = get_list_of_files(url)

https://ftp.dot.state.oh.us//pub/construction/JEF7MineGroutPDB/RID/
https://ftp.dot.state.oh.us//pub/construction/JEF7MineGroutPDB/RID/2023/20220627_102458_AsFiled.pdf


In [149]:
for url in full_urls:
    if Path(url).suffix != "":
        download_file(url)
    else:
        pass

Downloaded file 20220627_102458_AsFiled.pdf
