In [None]:
import numpy as np
import pandas as pd

# Set random seed for reproducibility
np.random.seed(42)

# Number of data points (over a million)
n_samples = 1_000_000_000

# Generate random values for controlling parameters
pipe_diameter = np.random.uniform(0.01, 1.0, n_samples)  # Pipe diameter in meters (0.01 to 1 m)
fluid_velocity = np.random.uniform(0.1, 10.0, n_samples)  # Fluid velocity in m/s (0.1 to 10 m/s)
fluid_density = np.random.uniform(800, 1000, n_samples)  # Fluid density in kg/m^3 (e.g., oil/water)
dynamic_viscosity = np.random.uniform(0.0005, 0.01, n_samples)  # Dynamic viscosity in Pa·s
surface_roughness = np.random.uniform(0.00001, 0.005, n_samples)  # Surface roughness in meters

# Calculate Reynolds number
Reynolds_number = (fluid_density * fluid_velocity * pipe_diameter) / dynamic_viscosity

# Generate friction factor using approximations:
friction_factor = np.where(
    Reynolds_number < 2000,
    64 / Reynolds_number,  # Laminar flow (f = 64/Re)
    0.079 * Reynolds_number**-0.25  # Turbulent flow (Blasius approximation)
)

# Create the DataFrame
df = pd.DataFrame({
    'Pipe Diameter (m)': pipe_diameter,
    'Fluid Velocity (m/s)': fluid_velocity,
    'Fluid Density (kg/m^3)': fluid_density,
    'Dynamic Viscosity (Pa.s)': dynamic_viscosity,
    'Surface Roughness (m)': surface_roughness,
    'Reynolds Number': Reynolds_number,
    'Friction Factor': friction_factor
})

# Save the dataset as a CSV file
output_file = "reynolds_vs_friction.csv"
df.to_csv(output_file, index=False)

print(f"Dataset with {n_samples} rows saved as {output_file}")
