In [5]:
import sys
from pathlib import Path

# Setup project directory
PROJECT_ROOT = Path().resolve().parents[0]
print("Project root:", PROJECT_ROOT)

# Add to Python path
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

# Import config
from config import PLACES_DATASETS

# Display available years and their details
print("Available CDC PLACES Datasets:")
for year, details in PLACES_DATASETS.items():
    print(f"\nYear: {year}")
    print(f"Dataset: {details['name']}")
    print(f"URL: {details['url']}")

Project root: /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project
Available CDC PLACES Datasets:

Year: 2024
Dataset: PLACES-Census-Tract-Data-GIS-Friendly-Format-2024
URL: https://data.cdc.gov/api/views/yjkw-uj5s/rows.csv

Year: 2023
Dataset: PLACES-Census-Tract-Data-GIS-Friendly-Format-2023
URL: https://data.cdc.gov/api/views/hky2-3tpn/rows.csv

Year: 2022
Dataset: PLACES-Census-Tract-Data-GIS-Friendly-Format-2022
URL: https://data.cdc.gov/api/views/shc3-fzig/rows.csv

Year: 2021
Dataset: PLACES-Census-Tract-Data-GIS-Friendly-Format-2021
URL: https://data.cdc.gov/api/views/mb5y-ytti/rows.csv

Year: 2020
Dataset: PLACES-Census-Tract-Data-GIS-Friendly-Format-2020
URL: https://data.cdc.gov/api/views/ib3w-k9rq/rows.csv


In [7]:
# This is to verify that the download function uses the correct paths
from pathlib import Path
print("Current working directory:", Path().resolve())
print("Project root:", Path().resolve().parents[0])

from src.data.download import download_places_data
file_path = download_places_data(2024)
print("Download path:", file_path)

2024-11-10 18:25:15,103 - src.data.download - INFO - File for year 2024 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv


Current working directory: /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/notebooks
Project root: /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project
Download path: /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv


In [8]:
# Download multiple years
# Let's get last 3 years of data
from src.data.download import download_multiple_years

recent_years = [2024, 2023, 2022]
downloaded_files = download_multiple_years(recent_years)

print("\nDownloaded files:")
for file in downloaded_files:
    print(f"- {file}")
    
# Check file sizes
for file in downloaded_files:
    size_mb = file.stat().st_size / (1024 * 1024)  # Convert to MB
    print(f"\n{file.name}: {size_mb:.1f} MB")

2024-11-10 18:25:30,559 - src.data.download - INFO - File for year 2024 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv
2024-11-10 18:25:30,571 - src.data.download - INFO - File for year 2023 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2023.csv
2024-11-10 18:25:30,574 - src.data.download - INFO - File for year 2022 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2022.csv



Downloaded files:
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2023.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2022.csv

places_2024.csv: 66.3 MB

places_2023.csv: 53.4 MB

places_2022.csv: 46.0 MB


In [10]:
# Download all available years
downloaded_files = download_multiple_years(PLACES_DATASETS.keys())

print("\nDownloaded files:")
for file in downloaded_files:
    print(f"- {file}")

2024-11-10 18:26:04,499 - src.data.download - INFO - File for year 2024 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv
2024-11-10 18:26:04,505 - src.data.download - INFO - File for year 2023 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2023.csv
2024-11-10 18:26:04,510 - src.data.download - INFO - File for year 2022 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2022.csv
2024-11-10 18:26:04,513 - src.data.download - INFO - File for year 2021 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2021.csv
2024-11-10 18:26:04,515 - src.data.download - INFO - File for year 2020 already exists at /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2020.csv



Downloaded files:
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2024.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2023.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2022.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2021.csv
- /Users/elenki/Documents/UofU/fall-2024/comp5960-dv/COMP5960_Project/data/raw/places_2020.csv
