In [1]:
# Cell 1: clone your GitHub repo and cd into it
!rm -rf ca-landuse-dashboard-main
!git clone https://github.com/jacka-m/ca-landuse-dashboard.git ca-landuse-dashboard-main
%cd ca-landuse-dashboard-main


Cloning into 'ca-landuse-dashboard-main'...
remote: Enumerating objects: 84, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 84 (delta 30), reused 24 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (84/84), 539.44 KiB | 5.04 MiB/s, done.
Resolving deltas: 100% (30/30), done.
/content/ca-landuse-dashboard-main


In [2]:
# Cell 2: install all dependencies
!pip install geopandas rasterio numpy pandas requests tqdm

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m73.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1.2 cligj-0.7.2 rasterio-1.4.3


In [None]:
# Colab cell — download & summarize with tqdm bars

from pathlib import Path
import requests, zipfile, io
import geopandas as gpd
import rasterio
import numpy as np
import pandas as pd
from rasterio.features import geometry_mask
from tqdm import tqdm

# 1. Define years and download links
years = list(range(2008, 2025))
download_links = {
    'nlcd': {
        yr: f"https://www.mrlc.gov/downloads/sciweb1/shared/mrlc/data-bundles/Annual_NLCD_LndCov_{yr}_CU_C1V1.zip"
        for yr in years
    },
    'cropscape': {
        yr: f"https://www.nass.usda.gov/Research_and_Science/Cropland/Release/datasets/{yr}_30m_cdls.zip"
        for yr in years
    }
}

# 2. Create target folders
raw_nlcd      = Path("data/raw/nlcd")
raw_cropscape = Path("data/raw/cropscape")
raw_nlcd.mkdir(parents=True, exist_ok=True)
raw_cropscape.mkdir(parents=True, exist_ok=True)

# 3. Helper to download and extract only .tif files from a ZIP
def download_and_extract_zip(url, dest_folder):
    resp = requests.get(url, stream=True)
    resp.raise_for_status()
    z = zipfile.ZipFile(io.BytesIO(resp.content))
    for member in z.namelist():
        if member.lower().endswith('.tif'):
            target_path = dest_folder / Path(member).name
            with open(target_path, 'wb') as f:
                f.write(z.read(member))

# 4. Download & extract with progress bars
for source, links in download_links.items():
    dest = raw_nlcd if source == 'nlcd' else raw_cropscape
    for yr, url in tqdm(links.items(), desc=f"Downloading {source}", unit="year"):
        download_and_extract_zip(url, dest)

# 5. Load CA boundary
boundary = gpd.read_file("data/ca_boundary.geojson")

# 6. Summarize rasters with progress bars
records = []
def summarize_raster(tif_path, label):
    with rasterio.open(tif_path) as src:
        arr = src.read(1).astype(float)
        mask = geometry_mask(
            boundary.geometry,
            transform=src.transform,
            invert=True,
            out_shape=src.shape
        )
        data = arr[mask]
        year = tif_path.stem.split("_")[-1]
        return {
            "dataset":      label,
            "year":         year,
            "mean_value":   float(np.nanmean(data)),
            "valid_pixels": int(np.count_nonzero(~np.isnan(data)))
        }

for label, folder in [('nlcd', raw_nlcd), ('cropscape', raw_cropscape)]:
    tif_list = list(folder.glob("*.tif"))
    for tif in tqdm(tif_list, desc=f"Summarizing {label}", unit="file"):
        records.append(summarize_raster(tif, label))

# 7. Save combined summary
df = pd.DataFrame.from_records(records)
df.to_csv("data/landcover_summary.csv", index=False)
print(f"→ Saved data/landcover_summary.csv with {len(df)} rows")


Downloading nlcd: 100%|██████████| 17/17 [38:15<00:00, 135.01s/year]
Downloading cropscape:  65%|██████▍   | 11/17 [40:14<22:39, 226.59s/year]