In [14]:
import os
from pathlib import Path
import shutil
import geopandas as gpd
import pandas as pd

UNIVERSES_DIR = Path('../src/streetTransformer/data/universes/')
INPUT_UNIVERSE = UNIVERSES_DIR / 'caprecon_plus_control' 
OUTPUT_UNIVERSE = UNIVERSES_DIR / 'caprecon_plus_control_downsampled'
N_SAMPLES = 200
YEARS = list(range(2006, 2025, 2))

REQUIRED_IDS = [7571, 8887, 11738, 11800, 12116, 14271, 15283, 15375, 15709, 15852]

# Get Locations
input_locations_gdf = gpd.read_feather(INPUT_UNIVERSE / 'locations.feather')

# Downsample and save
output_locations_gdf = pd.concat([input_locations_gdf.sample(N_SAMPLES - len(REQUIRED_IDS)), input_locations_gdf[input_locations_gdf['location_id'].isin(REQUIRED_IDS)]])
output_locations_gdf.to_feather(OUTPUT_UNIVERSE / 'locations.feather')

# Save IDs
output_ids = output_locations_gdf['location_id'].values

# Copy Documents
shutil.copy2(INPUT_UNIVERSE / 'documents.feather', OUTPUT_UNIVERSE / 'documents.feather')

# Copy all features
for year in YEARS: 
    input_dir = INPUT_UNIVERSE / 'features' / str(year)
    output_dir = OUTPUT_UNIVERSE / 'features' / str(year)
    output_dir.mkdir(parents=True, exist_ok=True)
    shutil.copytree(input_dir, output_dir, dirs_exist_ok=True)

# Now loop through imagery
for year in YEARS:
    input_dir = INPUT_UNIVERSE / 'imagery' / str(year)
    output_dir = OUTPUT_UNIVERSE / 'imagery' / str(year)
    output_dir.mkdir(parents=True, exist_ok=True)
    for f in input_dir.iterdir():
        if int(f.stem) in output_ids:
            shutil.copy2(f, output_dir / f.name)
        
    print(f'Done with year: {year}!')



Done with year: 2006!
Done with year: 2008!
Done with year: 2010!
Done with year: 2012!
Done with year: 2014!
Done with year: 2016!
Done with year: 2018!
Done with year: 2020!
Done with year: 2022!
Done with year: 2024!


In [15]:
shutil.make_archive("caprecon_plus_control_downsampled", "zip", root_dir=OUTPUT_UNIVERSE)

'/Users/jon/Documents/Employment/2025 VIDA Lab/streetTransformer/notebooks/caprecon_plus_control_downsampled.zip'