In [1]:
import pandas as pd
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

def generate_composition(n_samples, base_comp, noise_level=0.02):
    """Generates synthetic molar fractions with random noise."""
    components = ['N2', 'CO2', 'C1', 'C2', 'C3', 'iC4', 'nC4', 'iC5', 'nC5', 'C6', 'C7+']
    data = []
    
    for _ in range(n_samples):
        # Add noise to base composition
        sample = {comp: max(0, val + np.random.normal(0, val * noise_level)) for comp, val in base_comp.items()}
        
        # Normalize to ensure sum is 100% (or 1.0)
        total = sum(sample.values())
        sample = {k: v/total for k, v in sample.items()}
        data.append(sample)
    
    return pd.DataFrame(data)

# Define Base Compositions for 3 Distinct Compartments (High GOR, Medium GOR, Heavy Oil)
# Compartment A: Gas Cap / High GOR (High C1)
comp_A_base = {'N2': 0.02, 'CO2': 0.01, 'C1': 0.75, 'C2': 0.08, 'C3': 0.05, 
               'iC4': 0.02, 'nC4': 0.02, 'iC5': 0.01, 'nC5': 0.01, 'C6': 0.01, 'C7+': 0.02}

# Compartment B: Volatile Oil (Balanced)
comp_B_base = {'N2': 0.01, 'CO2': 0.02, 'C1': 0.55, 'C2': 0.10, 'C3': 0.08, 
               'iC4': 0.04, 'nC4': 0.04, 'iC5': 0.03, 'nC5': 0.03, 'C6': 0.04, 'C7+': 0.06}

# Compartment C: Black Oil (Higher Heavies)
comp_C_base = {'N2': 0.01, 'CO2': 0.01, 'C1': 0.35, 'C2': 0.05, 'C3': 0.05, 
               'iC4': 0.03, 'nC4': 0.03, 'iC5': 0.04, 'nC5': 0.04, 'C6': 0.09, 'C7+': 0.30}

# Generate Data
df_A = generate_composition(80, comp_A_base)
df_A['Well_ID'] = [f'Well-A-{i:02d}' for i in range(1, 81)]
df_A['Depth_m'] = np.random.uniform(2500, 2600, 80)
df_A['True_Compartment'] = 'Block_North'

df_B = generate_composition(90, comp_B_base)
df_B['Well_ID'] = [f'Well-B-{i:02d}' for i in range(1, 91)]
df_B['Depth_m'] = np.random.uniform(2800, 2950, 90)
df_B['True_Compartment'] = 'Block_Central'

df_C = generate_composition(80, comp_C_base)
df_C['Well_ID'] = [f'Well-C-{i:02d}' for i in range(1, 81)]
df_C['Depth_m'] = np.random.uniform(3100, 3200, 80)
df_C['True_Compartment'] = 'Block_South'

# Combine and Shuffle
df_final = pd.concat([df_A, df_B, df_C]).sample(frac=1).reset_index(drop=True)

# Reorder columns
cols = ['Well_ID', 'Depth_m'] + ['N2', 'CO2', 'C1', 'C2', 'C3', 'iC4', 'nC4', 'iC5', 'nC5', 'C6', 'C7+'] + ['True_Compartment']
df_final = df_final[cols]

# Save to CSV
df_final.to_csv('fluid_samples.csv', index=False)
print("fluid_samples.csv generated with 250 rows.")

fluid_samples.csv generated with 250 rows.
