In [1]:
import pandas as pd
import math
import rasterio
from rasterio.enums import Resampling
from rasterio.warp import calculate_default_transform, reproject
from affine import Affine
import duckdb

### Raster resampling

In [19]:
def _utm_epsg_for_lonlat(lon, lat):
    zone = int(math.floor((lon + 180) / 6) + 1)
    north = lat >= 0
    return f"EPSG:{32600 + zone if north else 32700 + zone}"

def resampling(input_raster, output_raster, scale_factor):
    """
    Resample to target meter resolution (scale_factor, e.g., 10 -> 10 m).
    If the source CRS is geographic (degrees), reproject to auto-UTM first,
    then resample to the requested meter resolution.
    """

    with rasterio.open(input_raster) as src:
        target_res_m = float(scale_factor)

        # --- Decide destination CRS ---
        src_crs = src.crs
        if src_crs is None:
            raise ValueError("Input raster has no CRS; cannot infer meters vs degrees.")

        # Compute raster centroid in its native CRS
        cx = (src.bounds.left + src.bounds.right) / 2.0
        cy = (src.bounds.top + src.bounds.bottom) / 2.0

        # Heuristic: if CRS is geographic (degrees), pick auto-UTM from lon/lat
        if src_crs.is_geographic:
            # centroid is already lon/lat in a geographic CRS
            dst_crs = _utm_epsg_for_lonlat(cx, cy)
        else:
            # Projected CRS; assume linear units are meters (common for UTM/state-plane meters)
            dst_crs = src_crs

        # --- Compute target grid (transform, width, height) at target meter resolution ---
        # If src is in degrees and dst is UTM, this step handles reprojection + resampling grid.
        # If src is already in meters, this simply changes pixel size to target_res_m.
        dst_transform, dst_width, dst_height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds,
            resolution=(target_res_m, target_res_m)
        )

        # --- Build output profile ---
        profile = src.profile.copy()
        profile.update({
            "crs": dst_crs,
            "transform": dst_transform,
            "width": max(1, dst_width),
            "height": max(1, dst_height),
            "compress": "lzw",
            "tiled": True
        })
        if src.nodata is not None:
            profile.update({"nodata": src.nodata})

        # --- Reproject + resample into the destination grid ---
        with rasterio.open(output_raster, 'w', **profile) as dst:
            for b in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, b),
                    destination=rasterio.band(dst, b),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=dst_transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.average,  # keep your choice; use 'nearest' for classes
                    src_nodata=src.nodata,
                    dst_nodata=src.nodata
                )

In [None]:
inpath = "/Volumes/Science_SSD/Dissertation/0_data/External/Bathymetries/Europa/Po/Po_bathymetry_wsg84.tif"
outpath = "/Volumes/Science_SSD/Dissertation/0_data/External/Bathymetries/Resampled_10/Po_resampled_10m.tif"
resampling(inpath, outpath, 10)

### csv to parquet

In [3]:
in_csv = "/Volumes/Science_SSD/Dissertation/3_output/Global_22_07_25/csv/Global_slopes_corrected.csv"
out_parquet = "/Volumes/Science_SSD/Dissertation/3_output/Global_22_07_25/parquet/Global_slopes_corrected.parquet"

duckdb.sql(f"""
    COPY (
        SELECT * FROM read_csv_auto('{in_csv}')
    )
    TO '{out_parquet}' (FORMAT PARQUET);
""")

In [5]:
parquet_file = "/Volumes/Science_SSD/Dissertation/3_output/Global_22_07_25/parquet/Global_slopes_corrected.parquet"
df = pd.read_parquet(parquet_file)
df.head()

Unnamed: 0,fid,node_id,time_str,lat,lon,wse,width,node_q_b,dark_frac,ice_clim_f,...,model_type,AIC,slope1,intercept1,slope2,intercept2,slope3,intercept3,bp,bps
0,1,11410000020041,2023-09-20 05:39:29+00:00,-0.164444,42.598592,1.89291,179.767297,2,0.044035,0,...,simple,-32.126012,0.008523,-0.099966,,,,,,
1,2,11410000020041,2023-10-31 23:09:36+00:00,-0.164383,42.598768,2.33904,190.234557,2,0.0,0,...,simple,-32.126012,0.008523,-0.099966,,,,,,
2,3,11410000020041,2024-01-02 13:24:52+00:00,-0.16627,42.597082,2.61014,294.911499,6,0.013475,0,...,simple,-32.126012,0.008523,-0.099966,,,,,,
3,4,11410000020041,2024-01-28 21:21:17+00:00,-0.164455,42.598644,0.56657,135.347474,3,0.080035,0,...,simple,-32.126012,0.008523,-0.099966,,,,,,
4,5,11410000020041,2024-02-13 06:55:02+00:00,-0.164231,42.598533,1.27356,228.668183,2,0.0,0,...,simple,-32.126012,0.008523,-0.099966,,,,,,
