Test the effect of number of data points as a function of time. 

In [1]:
import os
import numpy as np
import pygmt as pg
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import seaborn as sns
import pandas as pd
import pygeoinf as inf
from pygeoinf.symmetric_space.sphere import Sobolev
from time import time
from tqdm import tqdm

In [2]:
pg.config(MAP_FRAME_TYPE="plain")
pg.config(FONT_ANNOT_PRIMARY="10p,Palatino-Roman,black")
pg.config(FONT_ANNOT_SECONDARY="10p,Palatino-Roman,black")
pg.config(FONT_LABEL="10p,Palatino-Roman,black")

inf.configure_threading(n_threads=1)

DT_CMAP = "/space/ij264/earth-tunya/cpts/vik_DT.cpt"
STD_CMAP = "/space/ij264/earth-tunya/cpts/vik_DT_error.cpt"

Backend threading restricted to 1 thread(s).


In [3]:
start_time = time()

In [4]:
DATA_PATH = Path("/space/ij264/earth-tunya/geoinf_analysis/data/global.xyz")
N_DATA = 10000
LMAX_FULL = 64
LMAX_PRE = 16
MODEL_SPACE_ORDER = 2.0
MODEL_SPACE_SCALE = 0.1
PRIOR_ORDER = 0.1
PRIOR_SCALE = 0.01

In [5]:
FNAME_ROOT = f'/space/ij264/earth-tunya/geoinf_analysis/figures/real_data/{N_DATA}_points/p_order_{PRIOR_ORDER}_s_{PRIOR_SCALE}'
os.makedirs(FNAME_ROOT, exist_ok=True)

# --- Data Loading ---
data = pd.read_csv(
    DATA_PATH,
    names=["lon", "lat", "z", "z_err", "symbol"],
    sep=r"\s+",
).sample(N_DATA)

points_to_evaluate_at = list(zip(data["lat"], data["lon"]))

In [6]:
# --- Helper function for measures ---
def get_constrained_prior(space, order, scale):
    """Encapsulates the creation of a zero-mean Sobolev prior."""
    unconstrained = space.point_value_scaled_sobolev_kernel_gaussian_measure(
        order, scale
    )
    # Zero-mean constraint (l=0)
    constraint_op = space.to_coefficient_operator(0, lmin=0)
    constraint = inf.AffineSubspace.from_linear_equation(
        constraint_op, np.array([0]), solver=inf.CholeskySolver()
    )
    return constraint.condition_gaussian_measure(unconstrained)

In [7]:
model_space = Sobolev(LMAX_FULL, MODEL_SPACE_ORDER, MODEL_SPACE_SCALE)

# Construct forward model.
forward_op = model_space.point_evaluation_operator(points_to_evaluate_at)
data_error = inf.GaussianMeasure.from_standard_deviations(
    forward_op.codomain, data["z_err"].values
)
forward_prob = inf.LinearForwardProblem(forward_op, data_error_measure=data_error)

# Construct prior.
prior_measure = get_constrained_prior(model_space, PRIOR_ORDER, PRIOR_SCALE)

In [8]:
print("Forming the preconditioner...")
pre_space = Sobolev(LMAX_PRE, MODEL_SPACE_ORDER, MODEL_SPACE_SCALE)
pre_forward_op = pre_space.point_evaluation_operator(points_to_evaluate_at)
pre_forward_prob = inf.LinearForwardProblem(
    pre_forward_op, data_error_measure=data_error
)
pre_prior = get_constrained_prior(pre_space, PRIOR_ORDER, PRIOR_SCALE)

pre_inversion = inf.LinearBayesianInversion(pre_forward_prob, pre_prior)
solver = inf.EigenSolver(parallel=False)
pre_inversion_normal_op = pre_inversion.normal_operator.extract_diagonal(parallel=True, n_jobs=5)
pre_inversion_normal_op[pre_inversion_normal_op < 1e-12] = 1.0
preconditioner = inf.DiagonalSparseMatrixLinearOperator.from_diagonal_values(
    pre_inversion.data_space, pre_inversion.data_space, 1.0 / pre_inversion_normal_op
)

Forming the preconditioner...


In [9]:
# --- Final Inversion ---
print("Solving the linear system via CG...")
bi = inf.LinearBayesianInversion(forward_prob, prior_measure)
posterior_measure = bi.model_posterior_measure(
    data["z"].values,
    inf.CGMatrixSolver(),
    preconditioner=preconditioner,
)

Solving the linear system via CG...


In [10]:
end_time = time()
print(f"For {N_DATA} points, the total time taken: {end_time - start_time:.2f} seconds.")

For 10000 points, the total time taken: 82.13 seconds.


For 10000 points, the total time taken: 82.13 seconds.

For 1000 points, the total time taken: 8.85 seconds.

For 100 points, the total time taken: 4.05 seconds.