In [9]:
import pandas as pd
import numpy as np
from scipy.optimize import curve_fit

# Define the exponential decline function
def exp_decline(t, a, b, c):
    return a * np.exp(-b * t) + c

# Function to detect anomalies using exponential decline curve
def detect_anomalies(data, threshold=3):
    # Normalize the data
    data['Pressure_norm'] = (data['Pressure'] - data['Pressure'].min()) / (data['Pressure'].max() - data['Pressure'].min())
    
    # Improved initial guess for the parameters
    initial_guess = [data['Pressure_norm'].max(), 0.1, data['Pressure_norm'].min()]
    
    # Fit the exponential decline curve
    popt, _ = curve_fit(exp_decline, data['Time'], data['Pressure_norm'], p0=initial_guess, maxfev=5000)
    
    # Calculate the fitted values
    data['Fitted_norm'] = exp_decline(data['Time'], *popt)
    
    # Denormalize the fitted values
    data['Fitted'] = data['Fitted_norm'] * (data['Pressure'].max() - data['Pressure'].min()) + data['Pressure'].min()
    
    # Calculate residuals
    data['Residual'] = data['Pressure'] - data['Fitted']
    
    # Calculate the standard deviation of the residuals
    std_residual = np.std(data['Residual'])
    
    # Detect anomalies where residuals exceed the threshold
    data['Anomaly'] = np.abs(data['Residual']) > threshold * std_residual
    
    return data, popt

# Function to visualize the data, fitted curve, and anomalies
def visualize(data, popt):
    plt.figure(figsize=(14, 7))
    
    # Plot the original data
    plt.plot(data['Time'], data['Pressure'], label='Original Data', color='blue')
    
    # Plot the fitted curve
    plt.plot(data['Time'], data['Fitted'], label='Fitted Curve', color='green')
    
    # Highlight the anomalies
    anomalies = data[data['Anomaly']]
    plt.scatter(anomalies['Time'], anomalies['Pressure'], color='red', label='Anomalies')
    
    plt.xlabel('Time')
    plt.ylabel('Pressure')
    plt.title('Oil Well Bottom Hole Pressure Data with Anomalies')
    plt.legend()
    plt.show()

In [10]:
import yaml
import pandas as pd
# Load the config
with open('../config/config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Load the dataset
csv_path = config['paths']['WellCSVPath']

# Load the dataset
df = pd.read_csv(csv_path,low_memory=False)

# Convert the datetime column to datetime type if it's not already
df['DateTime'] = pd.to_datetime(df['Date'], format='mixed', dayfirst=True, errors='coerce')
df.rename(columns={'Z1 BHP*10': 'Pressure', 'DateTime': 'Time'}, inplace=True)


# Detect anomalies
result_df, parameters = detect_anomalies(df['Time','Pressure'])

# Visualize the results
visualize(result_df, parameters)

KeyError: ('Time', 'Pressure')