In [7]:
import pandas as pd
import numpy as np

# Define ranges for each condition
conditions = {
    "Non-Leak": {
        "Gas_Concentration(ppm)": (0.3, 0.9),
        "Temperature(\u00b0C)": (24.7, 25.9),
        "Pressure(psi)": (100, 101),
        "Thermal_Gradient(\u00b0C/m)": (0.03, 0.07),
        "Acoustic_Amplitude(dB)": (9, 11),
        "Acoustic_Frequency(Hz)": (50, 60),
        "Flow_Rate(L/min)": (60.7, 61.1),
    },
    "Small": {
        "Gas_Concentration(ppm)": (150, 200),
        "Temperature(\u00b0C)": (22.2, 23),
        "Pressure(psi)": (83, 85),
        "Thermal_Gradient(\u00b0C/m)": (0.7, 0.9),
        "Acoustic_Amplitude(dB)": (30, 35),
        "Acoustic_Frequency(Hz)": (310, 360),
        "Flow_Rate(L/min)": (54, 56),
    },
    "Medium": {
        "Gas_Concentration(ppm)": (650, 720),
        "Temperature(\u00b0C)": (20.6, 21.6),
        "Pressure(psi)": (75, 78),
        "Thermal_Gradient(\u00b0C/m)": (1.5, 1.8),
        "Acoustic_Amplitude(dB)": (50, 52),
        "Acoustic_Frequency(Hz)": (600, 650),
        "Flow_Rate(L/min)": (52, 53.5),
    },
    "Large": {
        "Gas_Concentration(ppm)": (1000, 1220),
        "Temperature(\u00b0C)": (18, 19.9),
        "Pressure(psi)": (59, 62),
        "Thermal_Gradient(\u00b0C/m)": (2.3, 2.8),
        "Acoustic_Amplitude(dB)": (72, 76),
        "Acoustic_Frequency(Hz)": (770, 850),
        "Flow_Rate(L/min)": (43.5, 46),
    }
}

# Segment to leak type mapping
segment_leak_mapping = {
    1: "Non-Leak",
    2: "Small",
    3: "Non-Leak",
    4: "Medium",
    5: "Non-Leak",
    6: "Large",
    7: "Non-Leak",
    8: "Small",
    9: "Non-Leak",
    10: "Medium",
    11: "Non-Leak",
    12: "Large"
}

# Generate data
rows = 15000
data = []
serial_number = 1

for _ in range(rows):
    segment = (serial_number - 1) % 12 + 1  # Cycle through segments 1 to 12
    leak_type = segment_leak_mapping[segment]  # Get leak type based on segment

    # Define leak status: 0 for Non-Leak, 1 for any leak type
    leak_status = 0 if leak_type == "Non-Leak" else 1

    # Initialize row with fixed columns
    row = {
        "Serial": serial_number,
        "Segment": segment,
        "Leak_Type": leak_type,
        "Leak_Status": leak_status,
    }

    # Populate sensor data based on leak type
    condition_key = "Non-Leak" if leak_type == "Non-Leak" else leak_type
    for feature, (low, high) in conditions[condition_key].items():
        row[feature] = round(np.random.uniform(low, high), 2)

    data.append(row)
    serial_number += 1

# Create DataFrame
columns = [
    "Serial",
    "Gas_Concentration(ppm)",
    "Temperature(\u00b0C)",
    "Pressure(psi)",
    "Thermal_Gradient(\u00b0C/m)",
    "Acoustic_Amplitude(dB)",
    "Acoustic_Frequency(Hz)",
    "Flow_Rate(L/min)",
    "Segment",
    "Leak_Type",
    "Leak_Status",
]

df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv("gas_pipeline_data.csv", index=False)
print("Dataset with 15,000 rows saved as 'gas_pipeline_data.csv'")


Dataset with 15,000 rows saved as 'gas_pipeline_data.csv'
