In [1]:
import time
import numpy as np
from sklearn.linear_model import LinearRegression

# Number of training samples
N = 10000

# Randomly generate feature columns
densities = np.random.rand(N) * 10          # example range [0, 10]
lids = np.random.rand(N) * 5                # example range [0, 5]
lpips_variances = np.random.rand(N)         # example range [0, 1]

# Stack into X matrix (N, 3)
X_train = np.stack([densities, lids, lpips_variances], axis=1)

# Random target values
y_train = np.random.rand(N) * 2

model = LinearRegression(fit_intercept=True)
model.fit(X_train, y_train)

# --- benchmark predict ---
batch_sizes_to_test = [4, 8, 16, 32, 64, 256, 1024]
num_warmup = 200
num_iters = 2000  # increase for more stable numbers on fast ops

# Make sure X has the same dtype/layout you use in real code
# (float64 is sklearn default; use float32 if that's what you feed it)
dtype = np.float64

for bs in batch_sizes_to_test:
    X = np.random.randn(bs, X_train.shape[1]).astype(dtype, copy=False)
    X = np.ascontiguousarray(X)  # avoid timing weird strides

    # warm-up
    for _ in range(num_warmup):
        _ = model.predict(X)

    # timing
    start = time.perf_counter()
    for _ in range(num_iters):
        _ = model.predict(X)
    end = time.perf_counter()

    avg_time_per_batch = (end - start) / num_iters
    avg_time_per_sample = avg_time_per_batch / bs

    print(
        f"bs={bs:>5} | "
        f"{avg_time_per_sample:.3e} s/sample | "
        f"{avg_time_per_batch:.3e} s/batch"
    )


bs=    4 | 1.435e-05 s/sample | 5.739e-05 s/batch
bs=    8 | 7.583e-06 s/sample | 6.066e-05 s/batch
bs=   16 | 3.651e-06 s/sample | 5.841e-05 s/batch
bs=   32 | 1.819e-06 s/sample | 5.820e-05 s/batch
bs=   64 | 1.056e-06 s/sample | 6.759e-05 s/batch
bs=  256 | 2.795e-07 s/sample | 7.156e-05 s/batch
bs= 1024 | 6.976e-08 s/sample | 7.143e-05 s/batch
