In [1]:
!uv pip install xarray==2024.11.0 zarr==2.18.4 multiformats git+https://github.com/dClimate/py-hamt.git geopandas fiona geopy simplekml

[2mUsing Python 3.12.7 environment at: /opt/venv[0m
[2mResolved [1m45 packages[0m [2min 3.95s[0m[0m                                            [0m
[2K[2mPrepared [1m8 packages[0m [2min 24.14s[0m[0m                                            
         If the cache and target directories are on different filesystems, hardlinking may not be supported.
[2K[2mInstalled [1m8 packages[0m [2min 84ms[0m[0m                                [0m
 [32m+[39m [1mclick-plugins[0m[2m==1.1.1[0m
 [32m+[39m [1mcligj[0m[2m==0.7.2[0m
 [32m+[39m [1mfiona[0m[2m==1.10.1[0m
 [32m+[39m [1mgeographiclib[0m[2m==2.0[0m
 [32m+[39m [1mgeopandas[0m[2m==1.0.1[0m
 [32m+[39m [1mgeopy[0m[2m==2.4.1[0m
 [32m+[39m [1mpyogrio[0m[2m==0.10.0[0m
 [32m+[39m [1msimplekml[0m[2m==1.3.6[0m


In [5]:
# Fluorine Node
!ipfs swarm peering add "/ip4/15.235.14.184/tcp/4001/p2p/12D3KooWHdZM98wcuyGorE184exFrPEJWv2btXWWSHLQaqwZXuPe"

# Cerium Node
!ipfs swarm peering add "/ip4/15.235.14.184/tcp/4001/p2p/12D3KooWGX5HDDjbdiJL2QYf2f7Kjp1Bj6QAXR5vFvLQniTKwoBR"

# Bismuth Node
!ipfs swarm peering add "/ip4/15.235.14.184/tcp/4001/p2p/12D3KooWEaVCpKd2MgZeLugvwCWRSQAMYWdu6wNG6SySQsgox8k5"

add 12D3KooWHdZM98wcuyGorE184exFrPEJWv2btXWWSHLQaqwZXuPe success
add 12D3KooWGX5HDDjbdiJL2QYf2f7Kjp1Bj6QAXR5vFvLQniTKwoBR success
add 12D3KooWEaVCpKd2MgZeLugvwCWRSQAMYWdu6wNG6SySQsgox8k5 success


In [None]:
!ipfs repo gc

In [7]:
import time
import xarray as xr
from py_hamt import HAMT, IPFSStore
from multiformats import CID
import pandas as pd
import numpy as np
import geopandas as gpd
import fiona
from geopy import geocoders 
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import simplekml
from shapely.geometry import Point, Polygon, MultiPolygon, shape

print("Imported all packages")

# Start timing
start_time = time.perf_counter()

# Root CID of the HAMT
era5_100m_uwind_cid = "bafyr4ic3bnbcemyxllvdwrz2jxeccxlyjqxllz3csqjwsf4ihapj4kundy"
era5_100m_vwind_cid = "bafyr4ianhuujwxgivs7m3xk7t4k7xi4bqkivb4asjid26fa3bv4ssy5jqm"

era5_100m_uwind_root_cid = CID.decode(era5_100m_uwind_cid)
era5_100m_vwind_root_cid = CID.decode(era5_100m_vwind_cid)

# Create HAMT instance using the IPFSStore
era5_100m_uwind_hamt = HAMT(store=IPFSStore(gateway_uri_stem="http://0.0.0.0:8080"), root_node_id=era5_100m_uwind_root_cid)
era5_100m_vwind_hamt = HAMT(store=IPFSStore(gateway_uri_stem="http://0.0.0.0:8080"), root_node_id=era5_100m_vwind_root_cid)

era5_100m_uwind = xr.open_zarr(store=era5_100m_uwind_hamt)
print("Loaded era5_100m_uwind")
era5_100m_vwind = xr.open_zarr(store=era5_100m_vwind_hamt)
print("Loaded era5_100m_vwind")

# Define geographic bounding box (Part of Central Texas)
lat_min, lat_max = 30, 31  # Latitude range
lon_min, lon_max = -100, -99  # Longitude range

# Convert From -180,180 to 0,360 for ERA5
def convert_longitude(lon):
    return lon % 360 if lon >= 0 else (lon + 360) % 360

lon_min, lon_max = convert_longitude(lon_min), convert_longitude(lon_max)

# Define time range
start_date = "2020-12-30"
end_date = "2020-12-31"

# Subset the data
subset_start = time.perf_counter()

subset_era5_100m_uwind = era5_100m_uwind.sel(
    latitude=slice(lat_max, lat_min),
    longitude=slice(lon_min, lon_max),
    time=slice(start_date, end_date)
)
print("Sliced era5_100m_uwind")

subset_era5_100m_vwind = era5_100m_vwind.sel(
    latitude=slice(lat_max, lat_min),
    longitude=slice(lon_min, lon_max),
    time=slice(start_date, end_date)
)
print("Sliced era5_100m_vwind")

subset_end = time.perf_counter()
print(f"Subsetting data took {subset_end - subset_start:.2f} seconds")

# Convert to DataFrame
convert_start = time.perf_counter()

df_u100 = subset_era5_100m_uwind['100m_u_component_of_wind'].to_dataframe().reset_index()
df_v100 = subset_era5_100m_vwind['100m_v_component_of_wind'].to_dataframe().reset_index()

df_u100 = df_u100.pivot(index='time', columns=['latitude', 'longitude'], values='100m_u_component_of_wind')
df_v100 = df_v100.pivot(index='time', columns=['latitude', 'longitude'], values='100m_v_component_of_wind')

df_u100.columns = [f"({lat}, {lon})" for lat, lon in df_u100.columns]
df_v100.columns = [f"({lat}, {lon})" for lat, lon in df_v100.columns]

convert_end = time.perf_counter()
print(f"DataFrame conversion took {convert_end - convert_start:.2f} seconds")

# End timing
end_time = time.perf_counter()
print(f"Total execution time: {end_time - start_time:.2f} seconds")

print("Done")

Imported all packages
Loaded era5_100m_uwind
Loaded era5_100m_vwind
Sliced era5_100m_uwind
Sliced era5_100m_vwind
Subsetting data took 0.18 seconds
DataFrame conversion took 77.69 seconds
Total execution time: 90.78 seconds
Done
