In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Number of "Simulation Runs"
n_samples = 2000

# 1. Generate Input Parameters (The inputs for the simulator)
data = {
    'Porosity': np.random.uniform(0.15, 0.35, n_samples),             # Fraction
    'Permeability_mD': np.random.uniform(50, 800, n_samples),         # milliDarcy
    'Initial_Pressure_psi': np.random.uniform(2500, 4500, n_samples), # psi
    'Water_Saturation': np.random.uniform(0.1, 0.4, n_samples),       # Fraction
    'Thickness_ft': np.random.uniform(20, 100, n_samples)             # Feet
}

df = pd.DataFrame(data)

# 2. Generate Synthetic "Simulator Output" (Cumulative Oil Production)
# We use a complex physics-inspired non-linear formula to mimic a simulator's behavior
# CumOil ~ (Perm * Thickness * (1-Sw) * Pressure) / Non-linear factors
df['Cum_Oil_Prod_bbl'] = (
    (df['Permeability_mD'] ** 0.6) * (df['Thickness_ft']) * (df['Porosity'] * (1 - df['Water_Saturation'])) * (df['Initial_Pressure_psi'] ** 0.8) * 1.5  # Calibration factor
)

# Add some random noise to simulate simulator convergence variances or complex heterogeneity not captured
noise = np.random.normal(0, df['Cum_Oil_Prod_bbl'].mean() * 0.02, n_samples)
df['Cum_Oil_Prod_bbl'] += noise

# Save to CSV
df.to_csv('reservoir_simulation_data.csv', index=False)
print("reservoir_simulation_data.csv generated successfully.")

reservoir_simulation_data.csv generated successfully.
