# gslib-zero Quickstart: From Data to Kriged Estimates

This notebook walks through a complete geostatistical workflow:
1. Load/create sample data
2. Normal score transform
3. Variogram analysis
4. Kriging estimation
5. Back-transform and visualization

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# gslib-zero imports
from gslib_zero import (
    nscore, backtr,
    gamv, plot_variogram,
    kt3d,
    GridSpec, VariogramModel, SearchParameters
)

## 1. Create Sample Data

We'll create a synthetic dataset that mimics a typical mineral deposit with spatial correlation.

In [None]:
np.random.seed(42)

# Generate 100 sample locations as a DataFrame
n_samples = 100
df = pd.DataFrame({
    'x': np.random.uniform(0, 1000, n_samples),
    'y': np.random.uniform(0, 1000, n_samples),
    'z': np.zeros(n_samples),
})

# Generate spatially correlated values (trend + noise)
trend = 0.005 * df.x + 0.003 * df.y
noise = np.random.normal(0, 1.5, n_samples)
df['grade'] = 5.0 + trend + noise

print(f"Samples: {len(df)}")
print(f"Grade range: {df.grade.min():.2f} - {df.grade.max():.2f}")
print(f"Mean: {df.grade.mean():.2f}, Std: {df.grade.std():.2f}")
df.head()

## 2. Exploratory Data Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Histogram
axes[0].hist(df.grade, bins=20, edgecolor='black', alpha=0.7)
axes[0].set_xlabel('Grade')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Grade Distribution')
axes[0].axvline(df.grade.mean(), color='red', linestyle='--', label=f'Mean: {df.grade.mean():.2f}')
axes[0].legend()

# Sample locations colored by value
scatter = axes[1].scatter(df.x, df.y, c=df.grade, cmap='viridis', s=50)
axes[1].set_xlabel('X')
axes[1].set_ylabel('Y')
axes[1].set_title('Sample Locations')
plt.colorbar(scatter, ax=axes[1], label='Grade')

plt.tight_layout()
plt.show()

## 3. Normal Score Transform

Many geostatistical methods assume Gaussian distributions. We transform to normal scores.

In [None]:
# Transform to normal scores - accepts Series directly!
df['nscore'], transform_table = nscore(df.grade, binary=True)

print(f"Original mean: {df.grade.mean():.3f}")
print(f"Normal score mean: {df.nscore.mean():.6f}")  # Should be ~0
print(f"Normal score std: {df.nscore.std():.6f}")    # Should be ~1

In [None]:
# Visualize the transform
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].hist(df.grade, bins=20, edgecolor='black', alpha=0.7)
axes[0].set_title('Original Distribution')
axes[0].set_xlabel('Grade')

axes[1].hist(df.nscore, bins=20, edgecolor='black', alpha=0.7, color='orange')
axes[1].set_title('Normal Score Distribution')
axes[1].set_xlabel('Normal Score')

plt.tight_layout()
plt.show()

## 4. Variogram Analysis

Compute experimental variogram to understand spatial correlation.

In [None]:
# Compute omnidirectional variogram - pass Series directly
variogram_results = gamv(
    df.x, df.y, df.z, df.nscore,
    nlag=15,
    lag_distance=50.0,
    binary=True
)

# gamv returns a list (one per direction)
variogram_result = variogram_results[0]
print(f"Lags computed: {len(variogram_result.gamma)}")
print(f"Max lag distance: {variogram_result.lag_distances.max():.0f}")

## 5. Fit Variogram Model

Manually fit a theoretical model to the experimental variogram.

In [None]:
# Create a spherical variogram model
# Parameters chosen by examining the experimental variogram
variogram_model = VariogramModel.spherical(
    sill=0.85,
    ranges=(300, 300, 1),
    nugget=0.15
)

print(f"Total sill: {variogram_model.total_sill}")
print(f"Nugget: {variogram_model.nugget}")

In [None]:
# Plot experimental vs model
ax = plot_variogram(
    experimental=variogram_result,
    model=variogram_model,
    title="Variogram: Experimental vs Model"
)
plt.show()

## 6. Define Estimation Grid

In [None]:
grid = GridSpec(
    nx=50, ny=50, nz=1,
    xmin=10, ymin=10, zmin=0,
    xsiz=20, ysiz=20, zsiz=1
)

print(f"Grid cells: {grid.ncells}")
print(f"Grid extent: X({grid.xmin}-{grid.xmax}), Y({grid.ymin}-{grid.ymax})")

## 7. Kriging

Run ordinary kriging on normal scores.

In [None]:
# Define search neighborhood
search = SearchParameters(
    radius1=400,
    radius2=400,
    radius3=10,
    min_samples=4,
    max_samples=16,
)

# Run kriging - pass Series directly!
result = kt3d(
    df.x, df.y, df.z, df.nscore,
    grid, variogram_model, search,
    kriging_type="ordinary",
    binary=True
)

print(f"Estimate shape: {result.estimate.shape}")
print(f"Kriged mean (normal scores): {result.estimate.mean():.4f}")

### Alternative: DataFrame with column names

You can also pass the DataFrame directly with column names:

In [None]:
# Equivalent using data= parameter
result_alt = kt3d(
    data=df, value_col='nscore',
    grid=grid, variogram=variogram_model, search=search,
    kriging_type="ordinary",
    binary=True
)

# Results are identical
print(f"Results match: {np.allclose(result.estimate, result_alt.estimate)}")

## 8. Back-Transform

Convert kriged normal scores back to original units.

In [None]:
estimates_original = backtr(
    result.estimate.ravel(),
    transform_table,
    zmin=df.grade.min() * 0.9,
    zmax=df.grade.max() * 1.1,
    binary=True
)

estimates_original = estimates_original.reshape(result.estimate.shape)

print(f"Back-transformed mean: {estimates_original.mean():.2f}")
print(f"Original data mean: {df.grade.mean():.2f}")

## 9. Visualize Results

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Kriged estimates
im1 = axes[0].imshow(
    estimates_original[0],
    extent=[grid.xmin, grid.xmax, grid.ymin, grid.ymax],
    origin='lower',
    cmap='viridis'
)
axes[0].scatter(df.x, df.y, c='red', s=20, alpha=0.5, label='Samples')
axes[0].set_xlabel('X')
axes[0].set_ylabel('Y')
axes[0].set_title('Kriged Estimates')
axes[0].legend()
plt.colorbar(im1, ax=axes[0], label='Grade')

# Kriging variance
im2 = axes[1].imshow(
    result.variance[0],
    extent=[grid.xmin, grid.xmax, grid.ymin, grid.ymax],
    origin='lower',
    cmap='Reds'
)
axes[1].scatter(df.x, df.y, c='blue', s=20, alpha=0.5, label='Samples')
axes[1].set_xlabel('X')
axes[1].set_ylabel('Y')
axes[1].set_title('Kriging Variance (Uncertainty)')
axes[1].legend()
plt.colorbar(im2, ax=axes[1], label='Variance')

plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
- Loading data as a pandas DataFrame
- Passing Series directly to gslib-zero functions (no `.values` needed)
- Complete workflow: nscore → gamv → kt3d → backtr
- Both input patterns: `kt3d(df.x, df.y, ...)` and `kt3d(data=df, value_col=...)`