In [9]:
import os
import requests
import zipfile

# Ensure the data directory exists
data_dir = './data'
try:
    os.makedirs(data_dir, exist_ok=True)
except PermissionError as e:
    print(f"Error creating data directory: {e}")
    raise

def download_file(url, filename):
    """
    Downloads a file from a given URL to the specified filename.
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"Downloaded: {filename}")
    except requests.RequestException as e:
        print(f"Error downloading {url}: {e}")
        raise

def download_and_extract_zip(url, dest_dir):
    """
    Downloads a ZIP file from a URL and extracts its contents to a destination directory.
    """
    filename = os.path.join(dest_dir, os.path.basename(url))

    # Check if the file is already downloaded
    if not os.path.exists(filename):
        try:
            download_file(url, filename)
        except PermissionError as e:
            print(f"Permission denied when downloading to {filename}: {e}")
            raise
    else:
        print(f"File already exists: {filename}")

    # Extract ZIP contents
    try:
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(dest_dir)
        print(f"Extracted: {filename} to {dest_dir}")
    except zipfile.BadZipFile as e:
        print(f"Error extracting {filename}: {e}")
        raise
    except PermissionError as e:
        print(f"Permission denied when extracting {filename}: {e}")
        raise

In [11]:
# Download SafeGraph data
base_url = 'https://public-read-access.s3.amazonaws.com/SenzingComPosts/EvalData/'
filename = 'SafeGraph_LasVegas_Partial.json.zip'

download_and_extract_zip(base_url + filename, data_dir)

File already exists: ./data/SafeGraph_LasVegas_Partial.json.zip
Extracted: ./data/SafeGraph_LasVegas_Partial.json.zip to ./data


In [14]:
# Download Senzing Dept Labor Whisard data
base_url = 'https://public-read-access.s3.amazonaws.com/SenzingComPosts/EvalData/'
filename = 'Dept_Labor_Whisard_LasVegas.json.zip'

download_and_extract_zip(base_url + filename, data_dir)

Downloaded: ./data/Dept_Labor_Whisard_LasVegas.json.zip
Extracted: ./data/Dept_Labor_Whisard_LasVegas.json.zip to ./data


In [15]:
# downlaod PPP Load data
base_url = 'https://public-read-access.s3.amazonaws.com/SenzingComPosts/EvalData/'
filename = 'PPP_Loans_Over_150k_LasVegas.json.zip'

download_and_extract_zip(base_url + filename, data_dir)

Downloaded: ./data/PPP_Loans_Over_150k_LasVegas.json.zip
Extracted: ./data/PPP_Loans_Over_150k_LasVegas.json.zip to ./data
