In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

n_samples = 500

data = {
    'Well_ID': [f'WELL-{i:03d}' for i in range(1, n_samples + 1)],
    'Qo_bpd': np.random.uniform(100, 1500, n_samples),      # Oil Rate
    'Qw_bpd': np.random.uniform(0, 500, n_samples),         # Water Rate
    'Qg_mscfd': np.random.uniform(50, 2000, n_samples),     # Gas Rate
    'P_wh_psi': np.random.uniform(150, 500, n_samples),     # Wellhead Pressure
    'T_wh_f': np.random.uniform(80, 120, n_samples),        # Wellhead Temp
    'API_gravity': np.random.uniform(30, 45, n_samples),    # Oil Gravity
    'SG_gas': np.random.uniform(0.65, 0.8, n_samples),      # Gas Specific Gravity
    'Depth_ft': np.random.uniform(5000, 8000, n_samples),   # Tubing Shoe Depth
    'Tubing_ID_in': np.random.choice([2.441, 2.992, 3.958], n_samples) # Tubing Inner Diameter
}

df = pd.DataFrame(data)

# Introduce some correlation: Deeper wells might have higher pressure/temp
df['T_res_f'] = df['T_wh_f'] + (df['Depth_ft'] / 100 * 1.5) # Simple geothermal gradient

df.to_csv('well_test_data.csv', index=False)
print("Data generated: well_test_data.csv")

Data generated: well_test_data.csv
