In [10]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Set simulation parameters
num_points = 50
sensors_per_point = 6
num_samples = 1000
interval_minutes = 10

leak_thresholds = {
    "pressure": 420 * 0.98,         # psi (2% decrease from minimum)
    "temperature": 80 * 0.98,       # °F (2% decrease from minimum)
    "flow_rate": 2500 * 0.98,       # barrels per hour (2% decrease from minimum)
    "vibration": 0.1 * 1.02,        # inches per second (2% increase)
    "acoustic": 80 * 1.02,          # dB (2% increase)
    "hydrocarbon_gas": 1 * 1.02     # ppm (2% increase)
}

# Generate timestamps
start_time = datetime.now()
timestamps = [start_time + timedelta(minutes=i * interval_minutes) for i in range(num_samples)]

# Generate synthetic data with realistic variation
np.random.seed(0) 

def generate_sensor_data(mean, std, num_samples):
    return np.random.normal(mean, std, num_samples)

# Generate normal data within realistic ranges
pressure = np.array([generate_sensor_data(460, 20, num_samples) for _ in range(num_points * sensors_per_point)])
temperature = np.array([generate_sensor_data(100, 10, num_samples) for _ in range(num_points * sensors_per_point)])
flow_rate = np.array([generate_sensor_data(2750, 125, num_samples) for _ in range(num_points * sensors_per_point)])
vibration = np.array([generate_sensor_data(0.1, 0.005, num_samples) for _ in range(num_points * sensors_per_point)])
acoustic = np.array([generate_sensor_data(80, 2, num_samples) for _ in range(num_points * sensors_per_point)])
hydrocarbon_gas = np.array([generate_sensor_data(0.5, 0.05, num_samples) for _ in range(num_points * sensors_per_point)])


# Function to introduce leaks of varying size, duration, and location
def introduce_leak(pressure, temperature, flow_rate, vibration, acoustic, hydrocarbon_gas, severity, start, duration, sensors):
    end = min(start + duration, num_samples)  # Ensure end index does not exceed array bounds
    for i in range(start, end):
        pressure[sensors, i] -= severity * 10
        temperature[sensors, i] += severity * 5
        flow_rate[sensors, i] -= severity * 50
        vibration[sensors, i] += severity * 0.02
        acoustic[sensors, i] += severity * 10
        hydrocarbon_gas[sensors, i] += severity * 0.1
        

# Introduce multiple leak events with different characteristics
introduce_leak(pressure, temperature, flow_rate, vibration, acoustic, hydrocarbon_gas,
               severity=1, start=300, duration=100, sensors=slice(0, 10))
introduce_leak(pressure, temperature, flow_rate, vibration, acoustic, hydrocarbon_gas,
               severity=2, start=600, duration=150, sensors=slice(20, 30))
introduce_leak(pressure, temperature, flow_rate, vibration, acoustic, hydrocarbon_gas,
               severity=0.5, start=800, duration=200, sensors=slice(40, 50))

# Generate sensor locations
locations = [f"Point {i}" for i in range(1, num_points + 1)]
points = np.repeat(locations, sensors_per_point * num_samples)

# Create a DataFrame to store the sensor data
data = {
    "timestamp": np.tile(timestamps, num_points * sensors_per_point),
    "sensor_id": np.repeat(range(num_points * sensors_per_point), num_samples),
    "location": points,
    "pressure": pressure.flatten(),
    "temperature": temperature.flatten(),
    "flow_rate": flow_rate.flatten(),
    "vibration": vibration.flatten(),
    "acoustic": acoustic.flatten(),
    "hydrocarbon_gas": hydrocarbon_gas.flatten(),
}

df = pd.DataFrame(data)

# Function to detect leaks
def detect_leaks(df, thresholds):
    conditions = (
        (df["pressure"] < thresholds["pressure"]) |
        (df["temperature"] < thresholds["temperature"]) |
        (df["flow_rate"] < thresholds["flow_rate"]) |
        (df["vibration"] > thresholds["vibration"]) |
        (df["acoustic"] > thresholds["acoustic"]) |
        (df["hydrocarbon_gas"] > thresholds["hydrocarbon_gas"])
    )
    df['anomalized'] = np.where(conditions, 1, 0)
    return df

# Detect leaks
df = detect_leaks(df, leak_thresholds)


max_attempts = 10  # Prevent infinite loops
attempt = 0
while df['anomalized'].mean() < 0.35 and attempt < max_attempts:
    introduce_leak(pressure, temperature, flow_rate, vibration, acoustic, hydrocarbon_gas,
                   severity=np.random.uniform(0.5, 2.0), start=np.random.randint(0, 900), duration=np.random.randint(50, 200), sensors=slice(np.random.randint(0, num_points * sensors_per_point - 1), np.random.randint(0, num_points * sensors_per_point)))
    data = {
        "timestamp": np.tile(timestamps, num_points * sensors_per_point),
        "sensor_id": np.repeat(range(num_points * sensors_per_point), num_samples),
        "location": points,
        "pressure": pressure.flatten(),
        "temperature": temperature.flatten(),
        "flow_rate": flow_rate.flatten(),
        "vibration": vibration.flatten(),
        "acoustic": acoustic.flatten(),
        "hydrocarbon_gas": hydrocarbon_gas.flatten(),
    }
    df = pd.DataFrame(data)
    df = detect_leaks(df, leak_thresholds)
    attempt += 1

# Print the final DataFrame
df.to_csv('sensor_data_1000.csv')
