In [19]:
# https://lehd.ces.census.gov/data/lodes/LODES8/ca/od/
# https://lehd.ces.census.gov/data/
# https://lehd.ces.census.gov/data/lodes/LODES8/ca/od/ca_od_main_JT00_2021.csv.gz

In [1]:
import requests
import time
import gzip
import shutil

In [8]:
def download_and_unzip(year, lodes_url_base):
    lodes_url = lodes_url_base + str(year) + ".csv.gz"
    r = requests.head(lodes_url)
    if r.status_code == 200:
        print("LEHD Origin-Destination Employment Statistics (LODES) for " + str(year) + " exists. To download, press enter.")
        input()
        # download file
        print("Downloading...")
        download_start = time.time()
        r = requests.get(lodes_url, allow_redirects=True)
        open('ca_od_main_JT00_' + str(year) + '.csv.gz', 'wb').write(r.content)
        download_end = time.time()
        print("Downloaded in: " + str((download_end - download_start)/60) + " minutes")

        # unzip file
        print("Unzipping...")
        unzip_start = time.time()
        with gzip.open('ca_od_main_JT00_' + str(year) + '.csv.gz', 'rb') as f_in:
            with open('../lodes_od_data/ca_od_main_JT00_' + str(year) + '.csv', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        unzip_end = time.time()
        print("Unzipped in: " + str((unzip_end - unzip_start)/60) + " minutes")
        print("CSV file saved to: ../lodes_od_data/ca_od_main_JT00_" + str(year) + ".csv")
        return True
    else:
        return False

In [9]:
lodes_url_base = "https://lehd.ces.census.gov/data/lodes/LODES8/ca/od/ca_od_main_JT00_"

In [11]:
# fetch current year
year = time.localtime().tm_year

while True:
    if download_and_unzip(year, lodes_url_base):
        break
    else:
        print("LEHD Origin-Destination Employment Statistics (LODES) for " + str(year) + " does not exist. Checking for " + str(year - 1) + "...")
        year -= 1

LEHD Origin-Destination Employment Statistics (LODES) for 2024 does not exist. Checking for 2023...
LEHD Origin-Destination Employment Statistics (LODES) for 2023 does not exist. Checking for 2022...
LEHD Origin-Destination Employment Statistics (LODES) for 2022 does not exist. Checking for 2021...
LEHD Origin-Destination Employment Statistics (LODES) for 2021 exists. To download, press enter.
Downloading...
Downloaded in: 3.4092824816703797 minutes
Unzipping...
Unzipped in: 0.05682069857915242 minutes
CSV file saved to: ../lodes_od_data/ca_od_main_JT00_2021.csv


In [22]:
# METHOD 1: User Input Based

while True:
    # promt string input from user for year
    year = input("Enter year: ")
    # append year to base url
    lodes_url = lodes_url_base + year + ".csv.gz"
    
    # check if url exists, if so, download and unzip
    r = requests.head(lodes_url)
    if r.status_code == 200:
        print("LEHD Origin-Destination Employment Statistics (LODES) for " + year + " exists. To download, press enter.")
        input()
        # download file
        print("Downloading...")
        download_start = time.time()
        r = requests.get(lodes_url, allow_redirects=True)
        open('ca_od_main_JT00_' + year + '.csv.gz', 'wb').write(r.content)
        download_end = time.time()
        print("Downloaded in: " + str((download_end - download_start)/60) + " minutes")

        # unzip file
        print("Unzipping...")
        unzip_start = time.time()
        with gzip.open('ca_od_main_JT00_' + year + '.csv.gz', 'rb') as f_in:
            with open('../lodes_od_data/ca_od_main_JT00_' + year + '.csv', 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
        unzip_end = time.time()
        print("Unzipped in: " + str((unzip_end - unzip_start)/60) + " minutes")
        print("CSV file saved to: ../lodes_od_data/ca_od_main_JT00_" + year + ".csv")
        break  # Exit the loop when the file is successfully downloaded and unzipped
    
    else:
        print("LEHD Origin-Destination Employment Statistics (LODES) for " + year + " does not exist. Please enter a valid year.")
        year = input("Enter year: ")

LEHD Origin-Destination Employment Statistics (LODES) for 2030 does not exist. Please enter a valid year.
LEHD Origin-Destination Employment Statistics (LODES) for 2019 exists. To download, press enter.
Downloading...
Downloaded in: 3.733450130621592 minutes
Unzipping...
Unzipped in: 0.07927983204523723 minutes
CSV file saved to: ../lodes_od_data/ca_od_main_JT00_2019.csv
