# 06_spatial_statistics_and_geostatistics.ipynb

# Title: Spatial Statistics and Geostatistics in Atmospheric Data
# Description: Hands-on Python notebook for learning spatial analysis, variograms, and kriging using synthetic atmospheric data.


In [None]:

# =======================
# 1. Import Required Libraries
# =======================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from skgstat import Variogram
from pykrige.ok import OrdinaryKriging

import libpysal
from esda.moran import Moran

In [None]:
# =======================
# 2. Generate Synthetic Spatial Data
# =======================
# Simulate 20 measurement points for an atmospheric variable (e.g., O3 or VOC)
np.random.seed(42)
n_points = 20

# Random coordinates (latitude, longitude)
latitudes = np.random.uniform(10, 20, n_points)
longitudes = np.random.uniform(30, 40, n_points)

# Simulated atmospheric values with spatial trend
values = 50 + (latitudes - 10) * 2 + (longitudes - 30) * 1.5 + np.random.normal(0, 3, n_points)

# Create DataFrame
df = pd.DataFrame({
    'lat': latitudes,
    'lon': longitudes,
    'value': values
})

df.head()



In [None]:
# =======================
# 3. Descriptive Spatial Statistics
# =======================
mean_val = df['value'].mean()
std_val = df['value'].std()
print(f"Mean value: {mean_val:.2f}")
print(f"Standard deviation: {std_val:.2f}")

# Visualize points with values
plt.figure(figsize=(7,6))
sc = plt.scatter(df['lon'], df['lat'], c=df['value'], cmap='viridis', s=100)
plt.colorbar(sc, label='Atmospheric Variable')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Spatial Distribution of Measurements')
plt.show()


In [None]:

# =======================
# 4. Spatial Autocorrelation (Moran's I)
# =======================
coords = list(zip(df['lat'], df['lon']))
w = libpysal.weights.DistanceBand(coords, threshold=5.0)  # neighbors within 5 units
moran = Moran(df['value'].values, w)
print(f"Moran's I: {moran.I:.3f}, p-value: {moran.p_sim:.3f}")

#

In [None]:
# =======================
# 5. Variogram Analysis
# =======================
variogram = Variogram(coordinates=df[['lat','lon']], values=df['value'], normalize=False)
variogram.plot()
plt.title('Empirical Variogram')
plt.show()

# Display nugget, sill, and range
print(f"Nugget: {variogram.nugget:.2f}")
print(f"Sill: {variogram.sill:.2f}")
print(f"Range: {variogram.range:.2f}")



In [None]:
# =======================
# 6. Ordinary Kriging Interpolation
# =======================
# Grid for prediction
grid_lat = np.linspace(df['lat'].min(), df['lat'].max(), 50)
grid_lon = np.linspace(df['lon'].min(), df['lon'].max(), 50)

OK = OrdinaryKriging(
    df['lat'].values,
    df['lon'].values,
    df['value'].values,
    variogram_model='linear',
    verbose=False,
    enable_plotting=False
)

z_pred, ss = OK.execute('grid', grid_lat, grid_lon)



In [None]:
# =======================
# 7. Visualization of Kriging Results
# =======================
plt.figure(figsize=(8,6))
plt.contourf(grid_lon, grid_lat, z_pred, cmap='viridis', levels=20)
plt.scatter(df['lon'], df['lat'], c='red', label='Observations')
plt.colorbar(label='Predicted Atmospheric Variable')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Ordinary Kriging Interpolation')
plt.legend()
plt.show()



In [None]:
# =======================
# 8. Summary
# =======================
print("""
Summary:
- We generated synthetic atmospheric spatial data.
- Calculated descriptive statistics (mean, std) and visualized spatial distribution.
- Computed Moran's I to assess spatial autocorrelation.
- Created empirical variogram to quantify spatial dependence (nugget, sill, range).
- Performed ordinary kriging to interpolate variable across unmeasured locations.
""")
