In [1]:
import pandas as pd
import numpy as np

# Configuration for Synthetic Data
n_samples = 20000
np.random.seed(42)

# 1. Generate Rock Properties (Porosity & Permeability)
# Log-normal distribution for Permeability (md)
perm = np.random.lognormal(mean=2.5, sigma=1.2, size=n_samples) 
# Porosity correlated to Permeability with noise
porosity = 0.05 + 0.04 * np.log(perm) + np.random.normal(0, 0.02, n_samples)
porosity = np.clip(porosity, 0.02, 0.35) # Clip to realistic range

# 2. Generate Capillary Pressure (Pc) inputs
# Random Pc values typical for core labs (0.1 to 100 psi)
pc_raw = np.random.exponential(scale=15, size=n_samples)
pc_raw = np.clip(pc_raw, 0.5, 200)

# 3. Define True Physical Parameters for Synthetic Calculation
sigma = 30  # Interfacial tension (dynes/cm)
theta = 0   # Contact angle
factor = 0.2166

# Calculate the "True" J-Function value based on random Pc
# J = 0.2166 * (Pc / (sigma * cos(theta))) * sqrt(k/phi)
j_value = factor * (pc_raw / (sigma * np.cos(np.radians(theta)))) * np.sqrt(perm / porosity)

# 4. Generate Saturation (Sw) based on a known Power Law: J = A * Sw^(-B)
# Inverting: Sw = (A / J)^(1/B)
A_true = 0.5
B_true = 1.5

# Add noise to mimic real lab measurement errors
noise = np.random.normal(0, 0.05, n_samples)
sw_calc = (A_true / j_value)**(1/B_true) + noise

# Clip Sw to physical bounds (Irreducible Sw to 1.0)
sw_final = np.clip(sw_calc, 0.05, 1.0)

# Create DataFrame
df = pd.DataFrame({
    'Sample_ID': range(1, n_samples + 1),
    'Porosity_frac': np.round(porosity, 4),
    'Permeability_md': np.round(perm, 2),
    'Pc_psi': np.round(pc_raw, 2),
    'Sw_frac': np.round(sw_final, 4),
    'IFT_dynes_cm': 30,
    'Contact_Angle_deg': 0
})

# Save
df.to_csv('capillary_pressure_data.csv', index=False)
print(f"Generated 'capillary_pressure_data.csv' with {len(df)} rows.")

Generated 'capillary_pressure_data.csv' with 20000 rows.
