In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Generate 250 rows of Pressure (psia) and Temperature (F)
n_rows = 500
pressures = np.random.uniform(1000, 5000, n_rows) # 1000 to 5000 psia
temperatures = np.random.uniform(100, 250, n_rows) # 100 to 250 F

# Synthetic Physics Functions (Hidden Ground Truth)
def get_density(p, t):
    # Simplified correlation for density behavior
    return (p * 0.05 + 800) / (t * 0.01 + 1) + np.random.normal(0, 5, len(p))

def get_viscosity(p, t, density):
    # Viscosity increases with density, decreases with T
    return (density / 1000) * 2.5 - (t / 500) + np.random.normal(0, 0.05, len(p))

# Create DataFrame
df = pd.DataFrame({
    'Pressure_psia': pressures,
    'Temperature_F': temperatures
})

# Calculate experimental values
df['Exp_Density_kgm3'] = get_density(df['Pressure_psia'], df['Temperature_F'])
df['Exp_Viscosity_cp'] = get_viscosity(df['Pressure_psia'], df['Temperature_F'], df['Exp_Density_kgm3'])

# Add some noise to simulate real lab measurement errors
df['Exp_Density_kgm3'] = df['Exp_Density_kgm3'] + np.random.normal(0, 2, n_rows)
df['Exp_Viscosity_cp'] = abs(df['Exp_Viscosity_cp'] + np.random.normal(0, 0.02, n_rows))

# Save
df.to_csv('pvt_data_synthetic.csv', index=False)
print("csv file created successfully.")

csv file created successfully.
