In [49]:
from pathlib import Path
import os
from dotenv import load_dotenv
import rasterio
from pathlib import Path
import numpy as np

In [50]:
load_dotenv()

True

In [3]:
BASE_DIR = Path(os.getenv("BASE_DIR"))

In [4]:
MAG_STRINGS_PATH = os.getenv("MAG_STRINGS_PATH")

In [5]:
MAG_STRINGS_PATH

'3_Magnetic Anomaly Map of Australia (20192020 packages; GeoTIFFNetCDF)'

# Magnetic abnormally map

In [6]:


folder = BASE_DIR / MAG_STRINGS_PATH 
paths = {
    "tmi": folder / "Magmap2019-grid-tmi-AWAGS_MAG_2019.tif",
    "rtp": folder / "Magmap2019-grid-tmi_rtp-AWAGS_MAG_2019.tif",
    "1vd": folder / "Magmap2019-grid-tmi_1vd-AWAGS_MAG_2019.tif",
}

for k,p in paths.items():
    with rasterio.open(p) as src:
        print("\n", k, p.name)
        print("CRS:", src.crs)
        print("Res:", src.res)
        print("Bounds:", src.bounds)
        print("Shape:", (src.height, src.width))
        print("Dtype:", src.dtypes[0])



 tmi Magmap2019-grid-tmi-AWAGS_MAG_2019.tif
CRS: EPSG:4283
Res: (0.0008500000000000001, 0.0008500000000000001)
Bounds: BoundingBox(left=129.00025000000002, bottom=-25.99895, right=137.99920000000003, top=-9.3381)
Shape: (19601, 10587)
Dtype: float32

 rtp Magmap2019-grid-tmi_rtp-AWAGS_MAG_2019.tif
CRS: EPSG:4283
Res: (0.0008500000000000001, 0.0008500000000000001)
Bounds: BoundingBox(left=129.00025000000002, bottom=-25.99895, right=137.99920000000003, top=-9.3381)
Shape: (19601, 10587)
Dtype: float32

 1vd Magmap2019-grid-tmi_1vd-AWAGS_MAG_2019.tif
CRS: EPSG:4283
Res: (0.0008500000000000001, 0.0008500000000000001)
Bounds: BoundingBox(left=129.00025000000002, bottom=-25.99895, right=137.99920000000003, top=-9.3381)
Shape: (19601, 10587)
Dtype: float32


In [7]:
for name, path in paths.items():
    with rasterio.open(path) as src:
        print(name, src.crs)

tmi EPSG:4283
rtp EPSG:4283
1vd EPSG:4283


In [8]:
import geopandas as gpd

In [10]:
MIN_STRINGS_PATH = os.getenv("MIN_STRINGS_PATH")

In [11]:
gpd_grid = gpd.read_parquet(BASE_DIR / MIN_STRINGS_PATH / "grid_with_labels.parquet")

In [14]:
gpd_grid.head()

Unnamed: 0,geometry,label
0,"POLYGON ((501931 7249146, 501931 7249646, 5014...",0.0
1,"POLYGON ((501931 7249646, 501931 7250146, 5014...",0.0
2,"POLYGON ((501931 7250146, 501931 7250646, 5014...",0.0
3,"POLYGON ((501931 7250646, 501931 7251146, 5014...",0.0
4,"POLYGON ((501931 7251146, 501931 7251646, 5014...",0.0


In [12]:
import rasterio
from rasterio.features import rasterize

In [18]:
gpd_grid = gpd_grid.reset_index(drop=True)
gpd_grid["grid_id"] = gpd_grid.index.astype(np.int32)

In [19]:
with rasterio.open(paths["tmi"]) as src:
    raster_crs = src.crs
    raster_transform = src.transform
    raster_shape = (src.height, src.width)

grid_r = gpd_grid.to_crs(raster_crs)

In [20]:
grid_id_raster = rasterize(
    shapes=zip(grid_r.geometry, grid_r["grid_id"]),
    out_shape=raster_shape,
    transform=raster_transform,
    fill=-1,            # pixel ngoài grid
    dtype="int32"
)

In [21]:
def aggregate_raster_by_grid_id(
    raster_path,
    grid_id_raster,
    stats=("mean", "std", "p90"),
):
    with rasterio.open(raster_path) as src:
        arr = src.read(1)
        nodata = src.nodata if src.nodata is not None else -99999

    mask = (grid_id_raster >= 0) & (arr != nodata)
    gids = grid_id_raster[mask]
    vals = arr[mask]

    out = {}

    if "mean" in stats:
        out["mean"] = np.bincount(gids, vals) / np.bincount(gids)

    if "std" in stats:
        mean = out["mean"]
        var = np.bincount(gids, (vals - mean[gids])**2) / np.bincount(gids)
        out["std"] = np.sqrt(var)

    if "p90" in stats:
        df = pd.DataFrame({"gid": gids, "val": vals})
        out["p90"] = df.groupby("gid")["val"].quantile(0.9).values

    return out


In [23]:
import pandas as pd

In [24]:
mag_feats = {}

for key, path in paths.items():
    stats = aggregate_raster_by_grid_id(
        path,
        grid_id_raster,
        stats=("mean", "std", "p90")
    )

    for stat_name, values in stats.items():
        mag_feats[f"{key}_{stat_name}"] = values


  out["mean"] = np.bincount(gids, vals) / np.bincount(gids)
  var = np.bincount(gids, (vals - mean[gids])**2) / np.bincount(gids)


In [25]:
for col, values in mag_feats.items():
    gpd_grid[col] = np.nan
    gpd_grid.loc[:len(values)-1, col] = values


In [26]:
gpd_grid.head()

Unnamed: 0,geometry,label,grid_id,tmi_mean,tmi_std,tmi_p90,rtp_mean,rtp_std,rtp_p90,1vd_mean,1vd_std,1vd_p90
0,"POLYGON ((501931 7249146, 501931 7249646, 5014...",0.0,0,-341.819104,1.742753,-339.439493,-34.24634,1.099034,-32.876038,-0.011716,0.000399,-0.011257
1,"POLYGON ((501931 7249646, 501931 7250146, 5014...",0.0,1,-343.149,1.73318,-340.917496,-32.579243,1.059147,-31.281596,-0.013229,0.000884,-0.011989
2,"POLYGON ((501931 7250146, 501931 7250646, 5014...",0.0,2,-344.533743,1.655959,-342.339349,-29.768549,1.097842,-28.366481,-0.014983,0.000292,-0.014655
3,"POLYGON ((501931 7250646, 501931 7251146, 5014...",0.0,3,-345.051959,1.430893,-343.094437,-26.194259,1.119967,-24.623184,-0.014757,0.00029,-0.014378
4,"POLYGON ((501931 7251146, 501931 7251646, 5014...",0.0,4,-345.027289,1.206863,-343.312057,-21.781616,1.419676,-19.703737,-0.014142,0.000449,-0.01361


In [27]:
mag_cols = [c for c in gpd_grid.columns if c.startswith(("tmi_", "rtp_", "vd1_"))]


nan_rate = gpd_grid[mag_cols].isna().mean().sort_values(ascending=False)
print(nan_rate.head(15))


print(gpd_grid[[c for c in mag_cols if c.endswith("_mean")]].describe())


tmi_mean    0.052747
tmi_std     0.052747
tmi_p90     0.052747
rtp_mean    0.052635
rtp_std     0.052635
rtp_p90     0.052635
dtype: float64
           tmi_mean      rtp_mean
count  5.075891e+06  5.076491e+06
mean  -6.693826e+01  1.146577e+01
std    1.694646e+02  1.988214e+02
min   -1.980966e+03 -2.727466e+03
25%   -1.360887e+02 -8.845759e+01
50%   -7.738126e+01 -1.455147e+01
75%   -9.815431e+00  7.853616e+01
max    4.641514e+03  4.848545e+03


In [None]:

# gpd_grid.to_parquet(BASE_DIR / MAG_STRINGS_PATH  / "grid_with_mag_features.parquet")

# Magnetic line

In [28]:
path = BASE_DIR / MAG_STRINGS_PATH / "NT_Geophysics_stringsmag_shp" / "GEOPHYS_STRINGS_MAGNETICS.shp"

gdf_mag_line = gpd.read_file(path)

In [29]:
gdf_mag_line.head()

Unnamed: 0,AMPLITUDE,CONT_HT,geometry
0,0.360539,500.0,"LINESTRING (131.88564 -11.01418, 131.88588 -11..."
1,1.534917,500.0,"LINESTRING (133.42261 -11.02652, 133.42302 -11..."
2,1.699376,500.0,"LINESTRING (131.21487 -11.0085, 131.21524 -11...."
3,2.470992,500.0,"LINESTRING (132.34396 -11.0208, 132.34404 -11...."
4,1.533746,500.0,"LINESTRING (133.41688 -11.0278, 133.41727 -11...."


In [30]:
gdf_mag_line.crs

<Geographic 2D CRS: EPSG:4283>
Name: GDA94
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: Australia including Lord Howe Island, Macquarie Island, Ashmore and Cartier Islands, Christmas Island, Cocos (Keeling) Islands, Norfolk Island. All onshore and offshore.
- bounds: (93.41, -60.55, 173.34, -8.47)
Datum: Geocentric Datum of Australia 1994
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [31]:
gdf_mag_line.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 906343 entries, 0 to 906342
Data columns (total 3 columns):
 #   Column     Non-Null Count   Dtype   
---  ------     --------------   -----   
 0   AMPLITUDE  906343 non-null  float64 
 1   CONT_HT    906343 non-null  float64 
 2   geometry   906343 non-null  geometry
dtypes: float64(2), geometry(1)
memory usage: 20.7 MB


In [32]:

grid_cent = gpd_grid[["grid_id", "geometry"]].copy()
grid_cent["geometry"] = grid_cent.geometry.centroid

In [33]:
nearest = gpd.sjoin_nearest(
    grid_cent,
    gdf_mag_line[["AMPLITUDE", "geometry"]],
    how="left",
    distance_col="dist_mag_line_m"
)


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:28352
Right CRS: EPSG:4283

  nearest = gpd.sjoin_nearest(



In [34]:
gpd_grid["dist_mag_line_m"] = nearest["dist_mag_line_m"].values
gpd_grid["mag_nearest_amp"] = nearest["AMPLITUDE"].values


In [35]:
gpd_grid[["dist_mag_line_m", "mag_nearest_amp"]].describe()

Unnamed: 0,dist_mag_line_m,mag_nearest_amp
count,5358536.0,5358536.0
mean,8011475.0,4.092627
std,400610.3,7.105428e-15
min,7266736.0,4.092627
25%,7665936.0,4.092627
50%,8011458.0,4.092627
75%,8357230.0,4.092627
max,8765695.0,4.092627


In [36]:
_ = gdf_mag_line.sindex
_ = grid_cent.sindex

In [37]:
gdf_mag_line_m = gdf_mag_line.to_crs(gpd_grid.crs) 

In [38]:
gpd_grid.columns

Index(['geometry', 'label', 'grid_id', 'tmi_mean', 'tmi_std', 'tmi_p90',
       'rtp_mean', 'rtp_std', 'rtp_p90', '1vd_mean', '1vd_std', '1vd_p90',
       'dist_mag_line_m', 'mag_nearest_amp'],
      dtype='object')

In [39]:
gdf_mag_line_m["length_m"] = gdf_mag_line_m.length

In [40]:

grid_base = gpd_grid[["grid_id", "geometry"]].copy()

grid_ids = grid_base["grid_id"].to_numpy()
chunks = np.array_split(grid_ids, int(np.ceil(len(grid_ids) / 400_000)))

dens_parts = []
amp_parts  = []

for i, ids in enumerate(chunks, 1):
    sub = grid_base.loc[grid_base["grid_id"].isin(ids)].copy()
    sub["geometry"] = sub.geometry.centroid

    # buffer 5km
    sub_buf = sub.copy()
    sub_buf["geometry"] = sub_buf.geometry.buffer(5000)

    # join 
    j = gpd.sjoin(
        sub_buf,
        gdf_mag_line_m[["AMPLITUDE", "length_m", "geometry"]],
        how="inner",
        predicate="intersects",
    )

    # length within 5km
    dens = j.groupby("grid_id")["length_m"].sum()
    dens_parts.append(dens)

    # max amplitude within 5km
    amp = j.groupby("grid_id")["AMPLITUDE"].max()
    amp_parts.append(amp)


    del sub, sub_buf, j

density = pd.concat(dens_parts).groupby(level=0).sum().rename("mag_len_5km")
ampmax  = pd.concat(amp_parts).groupby(level=0).max().rename("mag_amp_max_5km")


gpd_grid = gpd_grid.join(density, on="grid_id")
gpd_grid = gpd_grid.join(ampmax,  on="grid_id")

gpd_grid["mag_len_5km"] = gpd_grid["mag_len_5km"].fillna(0)
gpd_grid["mag_amp_max_5km"] = gpd_grid["mag_amp_max_5km"].fillna(0)



In [41]:
gpd_grid.describe()

Unnamed: 0,label,grid_id,tmi_mean,tmi_std,tmi_p90,rtp_mean,rtp_std,rtp_p90,1vd_mean,1vd_std,1vd_p90,dist_mag_line_m,mag_nearest_amp,mag_len_5km,mag_amp_max_5km
count,5358536.0,5358536.0,5075891.0,5075891.0,5075891.0,5076491.0,5076491.0,5076491.0,5076491.0,5076491.0,5076491.0,5358536.0,5358536.0,5358536.0,5358536.0
mean,0.0142516,2679268.0,-66.93826,6.377348,-58.59444,11.46577,7.612324,21.45659,3.460255e-05,0.02581609,0.03353513,8011475.0,4.092627,246234.2,49.06916
std,0.1081983,1546876.0,169.4646,17.30856,175.234,198.8214,20.83102,208.0418,0.1128894,0.08891928,0.1793691,400610.3,7.105428e-15,136581.7,66.47994
min,0.0,0.0,-1980.966,0.0,-1617.727,-2727.466,0.0,-2167.288,-7.414126,0.0,-3.088984,7266736.0,4.092627,0.0,0.0
25%,0.0,1339634.0,-136.0887,0.7792242,-130.7799,-88.45759,0.9367311,-82.84424,-0.01153668,0.001319484,-0.003840903,7665936.0,4.092627,156558.6,11.70586
50%,0.0,2679268.0,-77.38126,1.898976,-73.41091,-14.55147,2.243029,-8.859299,-0.001372288,0.004060122,0.003317693,8011458.0,4.092627,238443.7,26.51469
75%,0.0,4018901.0,-9.815431,5.172586,-3.426247,78.53616,6.187425,86.31475,0.007214037,0.01620304,0.02160755,8357230.0,4.092627,324842.3,59.37708
max,1.0,5358535.0,4641.514,1192.614,5360.796,4848.545,1550.656,5794.384,10.63437,8.804346,17.82777,8765695.0,4.092627,1261344.0,1163.558


In [42]:
gpd_grid

Unnamed: 0,geometry,label,grid_id,tmi_mean,tmi_std,tmi_p90,rtp_mean,rtp_std,rtp_p90,1vd_mean,1vd_std,1vd_p90,dist_mag_line_m,mag_nearest_amp,mag_len_5km,mag_amp_max_5km
0,"POLYGON ((501931 7249146, 501931 7249646, 5014...",0.0,0,-341.819104,1.742753,-339.439493,-34.246340,1.099034,-32.876038,-0.011716,0.000399,-0.011257,7.266736e+06,4.092627,124753.390173,44.800618
1,"POLYGON ((501931 7249646, 501931 7250146, 5014...",0.0,1,-343.149000,1.733180,-340.917496,-32.579243,1.059147,-31.281596,-0.013229,0.000884,-0.011989,7.267235e+06,4.092627,119626.656142,44.800618
2,"POLYGON ((501931 7250146, 501931 7250646, 5014...",0.0,2,-344.533743,1.655959,-342.339349,-29.768549,1.097842,-28.366481,-0.014983,0.000292,-0.014655,7.267733e+06,4.092627,84948.625951,44.800618
3,"POLYGON ((501931 7250646, 501931 7251146, 5014...",0.0,3,-345.051959,1.430893,-343.094437,-26.194259,1.119967,-24.623184,-0.014757,0.000290,-0.014378,7.268232e+06,4.092627,85881.737359,44.800618
4,"POLYGON ((501931 7251146, 501931 7251646, 5014...",0.0,4,-345.027289,1.206863,-343.312057,-21.781616,1.419676,-19.703737,-0.014142,0.000449,-0.013610,7.268731e+06,4.092627,89694.261007,44.800618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5358531,"POLYGON ((1462431 8640646, 1462431 8641146, 14...",0.0,5358531,-83.081830,0.137652,,-38.844597,0.407874,,-0.002623,0.000290,,8.763723e+06,4.092627,30247.352570,2.484874
5358532,"POLYGON ((1462431 8641146, 1462431 8641646, 14...",0.0,5358532,-83.039246,0.137805,,-37.774031,0.347206,,-0.001909,0.000117,,8.764216e+06,4.092627,31957.857559,2.484874
5358533,"POLYGON ((1462431 8641646, 1462431 8642146, 14...",0.0,5358533,-82.997389,0.162794,,-36.845743,0.332489,,-0.001805,0.000058,,8.764709e+06,4.092627,31957.857559,2.484874
5358534,"POLYGON ((1462431 8642146, 1462431 8642646, 14...",0.0,5358534,-83.021245,0.165225,,-36.004838,0.345801,,-0.002013,0.000089,,8.765202e+06,4.092627,32778.972386,2.484874


# Gravity

In [44]:
GRAVITY_STRINGS_PATH = os.getenv("GRAVITY_STRINGS_PATH")

In [None]:
path = BASE_DIR / GRAVITY_STRINGS_PATH / "NT_Geophysics_stringsgrav_shp" / "GEOPHYS_STRINGS_GRAVITY.shp"

gdf_grav = gpd.read_file(path)

print("Rows, Cols:", gdf_grav.shape)
print("CRS:", gdf_grav.crs)
print("Geometry types:", gdf_grav.geom_type.value_counts())
print("Columns:", list(gdf_grav.columns))
print("Total bounds:", gdf_grav.total_bounds)

gdf_grav.head()

Rows, Cols: (263222, 3)
CRS: EPSG:4283
Geometry types: LineString         263204
MultiLineString        18
Name: count, dtype: int64
Columns: ['AMPLITUDE', 'CONT_HT', 'geometry']
Total bounds: [128.504003 -26.497575 138.495835 -10.744592]


Unnamed: 0,AMPLITUDE,CONT_HT,geometry
0,0.151592,500.0,"LINESTRING (136.69125 -10.96658, 136.69278 -10..."
1,0.762267,500.0,"LINESTRING (135.88873 -10.9694, 135.88999 -10...."
2,0.190329,500.0,"LINESTRING (134.39525 -10.97142, 134.39502 -10..."
3,0.132056,500.0,"LINESTRING (136.66329 -10.97062, 136.66474 -10..."
4,5.665491,500.0,"LINESTRING (135.31458 -10.97325, 135.31537 -10..."


In [53]:
print("grav CRS:", gdf_grav.crs)

grav CRS: EPSG:4283


In [54]:
gpd_grid.crs

<Projected CRS: EPSG:28352>
Name: GDA94 / MGA zone 52
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Australia - onshore and offshore between 126°E and 132°E.
- bounds: (125.99, -37.38, 132.0, -9.1)
Coordinate Operation:
- name: Map Grid of Australia zone 52
- method: Transverse Mercator
Datum: Geocentric Datum of Australia 1994
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [55]:
gdf_grav = gdf_grav.to_crs(gpd_grid.crs)

In [56]:
gdf_grav.crs

<Projected CRS: EPSG:28352>
Name: GDA94 / MGA zone 52
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Australia - onshore and offshore between 126°E and 132°E.
- bounds: (125.99, -37.38, 132.0, -9.1)
Coordinate Operation:
- name: Map Grid of Australia zone 52
- method: Transverse Mercator
Datum: Geocentric Datum of Australia 1994
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [57]:
# centroid grid
grid_cent = gpd_grid[["grid_id", "geometry"]].copy()
grid_cent["geometry"] = grid_cent.geometry.centroid

# build spatial index
_ = gdf_grav.sindex
_ = grid_cent.sindex

nearest = gpd.sjoin_nearest(
    grid_cent,
    gdf_grav[["AMPLITUDE", "geometry"]],
    how="left",
    distance_col="dist_grav_line_m"
)


nearest_1 = (
    nearest
    .sort_values("dist_grav_line_m")
    .drop_duplicates(subset="grid_id", keep="first")
    .set_index("grid_id")
)


In [58]:
gpd_grid = gpd_grid.join(
    nearest_1[["dist_grav_line_m", "AMPLITUDE"]],
    on="grid_id"
)

gpd_grid = gpd_grid.rename(columns={"AMPLITUDE": "grav_nearest_amp"})

In [59]:
gpd_grid.head()

Unnamed: 0,geometry,label,grid_id,tmi_mean,tmi_std,tmi_p90,rtp_mean,rtp_std,rtp_p90,1vd_mean,1vd_std,1vd_p90,dist_mag_line_m,mag_nearest_amp,mag_len_5km,mag_amp_max_5km,dist_grav_line_m,grav_nearest_amp
0,"POLYGON ((501931 7249146, 501931 7249646, 5014...",0.0,0,-341.819104,1.742753,-339.439493,-34.24634,1.099034,-32.876038,-0.011716,0.000399,-0.011257,7266736.0,4.092627,124753.390173,44.800618,8.111075,19.039854
1,"POLYGON ((501931 7249646, 501931 7250146, 5014...",0.0,1,-343.149,1.73318,-340.917496,-32.579243,1.059147,-31.281596,-0.013229,0.000884,-0.011989,7267235.0,4.092627,119626.656142,44.800618,304.773741,9.145121
2,"POLYGON ((501931 7250146, 501931 7250646, 5014...",0.0,2,-344.533743,1.655959,-342.339349,-29.768549,1.097842,-28.366481,-0.014983,0.000292,-0.014655,7267733.0,4.092627,84948.625951,44.800618,671.61014,9.145121
3,"POLYGON ((501931 7250646, 501931 7251146, 5014...",0.0,3,-345.051959,1.430893,-343.094437,-26.194259,1.119967,-24.623184,-0.014757,0.00029,-0.014378,7268232.0,4.092627,85881.737359,44.800618,982.790103,9.100928
4,"POLYGON ((501931 7251146, 501931 7251646, 5014...",0.0,4,-345.027289,1.206863,-343.312057,-21.781616,1.419676,-19.703737,-0.014142,0.000449,-0.01361,7268731.0,4.092627,89694.261007,44.800618,1345.733665,9.100928


In [60]:

gdf_grav["length_m"] = gdf_grav.length


buf = grid_cent.copy()
buf["geometry"] = buf.geometry.buffer(5000)

join = gpd.sjoin(
    buf,
    gdf_grav[["AMPLITUDE", "length_m", "geometry"]],
    how="inner",                 
    predicate="intersects"
)

# sum length within 5km
grav_len_5km = (
    join.groupby("grid_id")["length_m"]
    .sum()
    .rename("grav_len_5km")
)

# max amplitude within 5km
grav_amp_max_5km = (
    join.groupby("grid_id")["AMPLITUDE"]
    .max()
    .rename("grav_amp_max_5km")
)


gpd_grid = gpd_grid.drop(columns=["grav_len_5km", "grav_amp_max_5km"], errors="ignore")

# attach
gpd_grid = gpd_grid.join(grav_len_5km, on="grid_id")
gpd_grid = gpd_grid.join(grav_amp_max_5km, on="grid_id")

gpd_grid["grav_len_5km"] = gpd_grid["grav_len_5km"].fillna(0)
gpd_grid["grav_amp_max_5km"] = gpd_grid["grav_amp_max_5km"].fillna(0)


In [61]:
print(gpd_grid[["dist_grav_line_m", "grav_nearest_amp", "grav_len_5km", "grav_amp_max_5km"]].describe())

       dist_grav_line_m  grav_nearest_amp  grav_len_5km  grav_amp_max_5km
count      5.358536e+06      5.358536e+06  5.358536e+06      5.358536e+06
mean       3.546099e+03      1.504219e+01  1.758300e+05      2.877395e+01
std        1.090371e+04      2.459738e+01  2.075617e+05      3.520846e+01
min        1.983749e-04      5.014100e-02  0.000000e+00      0.000000e+00
25%        4.079758e+02      2.699886e+00  5.193768e+04      8.150618e+00
50%        1.190336e+03      6.231133e+00  1.318107e+05      1.841699e+01
75%        2.462710e+03      1.688640e+01  2.323144e+05      3.682003e+01
max        1.113971e+05      3.343819e+02  2.830241e+06      3.343819e+02


In [62]:
gpd_grid.columns

Index(['geometry', 'label', 'grid_id', 'tmi_mean', 'tmi_std', 'tmi_p90',
       'rtp_mean', 'rtp_std', 'rtp_p90', '1vd_mean', '1vd_std', '1vd_p90',
       'dist_mag_line_m', 'mag_nearest_amp', 'mag_len_5km', 'mag_amp_max_5km',
       'dist_grav_line_m', 'grav_nearest_amp', 'grav_len_5km',
       'grav_amp_max_5km'],
      dtype='object')

In [65]:
out_gpkg  = BASE_DIR/MAG_STRINGS_PATH / "geo_analyse.gpkg"
gpd_grid.to_file(
    out_gpkg,
    layer="grid_500m",
    driver="GPKG"
)

In [None]:
geo_grid = gpd.read_file(out_gpkg, layer="grid_500m")