In [3]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# Set simulation parameters
num_sensors = 10
num_samples = 1000
interval_minutes = 30
leak_thresholds = {
    "pressure": 500,        # psi
    "temperature": 120,     # °F
    "flow_rate": 3000,      # barrels per hour
    "vibration": 0.2,       # inches per second
    "acoustic": 80,         # dB
    "hydrocarbon_gas": 1    # ppm
}

# Generate timestamps
start_time = datetime.now()
timestamps = [start_time + timedelta(minutes=i * interval_minutes) for i in range(num_samples)]

# Generate synthetic data with more realistic variation
np.random.seed(0)  # For reproducibility

def generate_sensor_data(mean, std, anomaly=False):
    data = np.random.normal(mean, std, num_samples)
    if anomaly:
        data += np.sin(np.linspace(0, 10, num_samples)) * std
    return data

pressure = np.array([generate_sensor_data(1000, 50, True) for _ in range(num_sensors)])
temperature = np.array([generate_sensor_data(90, 5, True) for _ in range(num_sensors)])
flow_rate = np.array([generate_sensor_data(2000, 100, True) for _ in range(num_sensors)])
vibration = np.array([generate_sensor_data(0.05, 0.01, True) for _ in range(num_sensors)])
acoustic = np.array([generate_sensor_data(60, 5, True) for _ in range(num_sensors)])
hydrocarbon_gas = np.array([generate_sensor_data(0, 0.1, True) for _ in range(num_sensors)])

# Introduce a leak event
leak_time = 500
for i in range(leak_time, num_samples):
    pressure[:, i] -= 600
    temperature[:, i] += 40
    flow_rate[:, i] -= 800
    vibration[:, i] += 0.3
    acoustic[:, i] += 30
    hydrocarbon_gas[:, i] += 2

# Generate sensor locations
locations = [(np.random.uniform(4.0, 14.0), np.random.uniform(6.0, 13.0)) for _ in range(num_sensors)]
latitudes, longitudes = zip(*locations)

# Create a DataFrame to store the sensor data
data = {
    "timestamp": np.tile(timestamps, num_sensors),
    "sensor_id": np.repeat(range(num_sensors), num_samples),
    "latitude": np.repeat(latitudes, num_samples),
    "longitude": np.repeat(longitudes, num_samples),
    "pressure": pressure.flatten(),
    "temperature": temperature.flatten(),
    "flow_rate": flow_rate.flatten(),
    "vibration": vibration.flatten(),
    "acoustic": acoustic.flatten(),
    "hydrocarbon_gas": hydrocarbon_gas.flatten()
}

df = pd.DataFrame(data)

# Function to detect leaks
def detect_leaks(df, thresholds):
    conditions = (
        (df["pressure"] < thresholds["pressure"]) &
        (df["temperature"] > thresholds["temperature"]) &
        (df["flow_rate"] < thresholds["flow_rate"]) &
        (df["vibration"] > thresholds["vibration"]) &
        (df["acoustic"] > thresholds["acoustic"]) &
        (df["hydrocarbon_gas"] > thresholds["hydrocarbon_gas"])
    )
    df['anomalized'] = np.where(conditions, "anomalized", "not-anomalized")
    return df

# Detect leaks
df = detect_leaks(df, leak_thresholds)

# Print the DataFrame
df.head(20) # Print the first 20 rows for brevity


Unnamed: 0,timestamp,sensor_id,latitude,longitude,pressure,temperature,flow_rate,vibration,acoustic,hydrocarbon_gas,anomalized
0,2024-07-26 08:39:14.195800,0,7.071151,7.772511,1088.202617,88.989415,2033.004589,0.053712,50.302498,0.006604,not-anomalized
1,2024-07-26 09:09:14.195800,0,7.071151,7.772511,1020.508353,85.883894,2000.952986,0.053148,63.744301,0.170999,not-anomalized
2,2024-07-26 09:39:14.195800,0,7.071151,7.772511,1049.937833,98.768095,2083.813458,0.055241,67.073516,-0.15992,not-anomalized
3,2024-07-26 10:09:14.195800,0,7.071151,7.772511,1113.545936,91.103373,2045.823922,0.051653,56.082202,-0.056635,not-anomalized
4,2024-07-26 10:39:14.195800,0,7.071151,7.772511,1095.379367,89.311095,1753.608202,0.056938,56.106037,-0.207746,not-anomalized
5,2024-07-26 11:09:14.195800,0,7.071151,7.772511,953.637564,84.82502,2017.051029,0.049498,59.736157,-0.081305,not-anomalized
6,2024-07-26 11:39:14.195800,0,7.071151,7.772511,1050.505619,95.161431,2086.791682,0.041967,66.496846,0.04565,not-anomalized
7,2024-07-26 12:09:14.195800,0,7.071151,7.772511,995.932777,97.507422,2067.213372,0.042961,62.171451,-0.055676,not-anomalized
8,2024-07-26 12:39:14.195800,0,7.071151,7.772511,998.838783,89.174607,1921.480452,0.041897,54.566785,-0.192681,not-anomalized
9,2024-07-26 13:09:14.195800,0,7.071151,7.772511,1025.028339,87.333572,1993.664792,0.051204,57.930701,-0.164341,not-anomalized
