In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import joblib 

In [9]:
file_path =  "Volve production data.xlsx"
daily= pd.read_excel(file_path, sheet_name="Daily Production Data", parse_dates=["DATEPRD"])

print("Shapre of the data:", daily.shape)
print("Columne:s in the data:", daily.columns.tolist())
daily.head(2)

Shapre of the data: (15634, 24)
Columne:s in the data: ['DATEPRD', 'WELL_BORE_CODE', 'NPD_WELL_BORE_CODE', 'NPD_WELL_BORE_NAME', 'NPD_FIELD_CODE', 'NPD_FIELD_NAME', 'NPD_FACILITY_CODE', 'NPD_FACILITY_NAME', 'ON_STREAM_HRS', 'AVG_DOWNHOLE_PRESSURE', 'AVG_DOWNHOLE_TEMPERATURE', 'AVG_DP_TUBING', 'AVG_ANNULUS_PRESS', 'AVG_CHOKE_SIZE_P', 'AVG_CHOKE_UOM', 'AVG_WHP_P', 'AVG_WHT_P', 'DP_CHOKE_SIZE', 'BORE_OIL_VOL', 'BORE_GAS_VOL', 'BORE_WAT_VOL', 'BORE_WI_VOL', 'FLOW_KIND', 'WELL_TYPE']


Unnamed: 0,DATEPRD,WELL_BORE_CODE,NPD_WELL_BORE_CODE,NPD_WELL_BORE_NAME,NPD_FIELD_CODE,NPD_FIELD_NAME,NPD_FACILITY_CODE,NPD_FACILITY_NAME,ON_STREAM_HRS,AVG_DOWNHOLE_PRESSURE,...,AVG_CHOKE_UOM,AVG_WHP_P,AVG_WHT_P,DP_CHOKE_SIZE,BORE_OIL_VOL,BORE_GAS_VOL,BORE_WAT_VOL,BORE_WI_VOL,FLOW_KIND,WELL_TYPE
0,2014-04-07,NO 15/9-F-1 C,7405,15/9-F-1 C,3420717,VOLVE,369304,MÆRSK INSPIRER,0.0,0.0,...,%,0.0,0.0,0.0,0.0,0.0,0.0,,production,WI
1,2014-04-08,NO 15/9-F-1 C,7405,15/9-F-1 C,3420717,VOLVE,369304,MÆRSK INSPIRER,0.0,,...,%,0.0,0.0,0.0,0.0,0.0,0.0,,production,OP


In [10]:
target_col = "BORE_OIL_VOL"
num_features = ["ON_STREAM_HRS", "AVG_DOWNHOLE_PRESSURE", "AVG_WHP_P", "AVG_WHT_P",
    "DP_CHOKE_SIZE", "BORE_GAS_VOL", "BORE_WAT_VOL"]
num_features =[c for c in num_features if c in daily.columns]
cat_col = "WELL_BORE_CODE"

model_df = daily[["DATEPRD", cat_col] + num_features + [target_col]].copy()
model_df=model_df.rename(columns={target_col:"TARGET"})
model_df=model_df.dropna(subset=["TARGET"])
print("Shape of model_df:", model_df.shape)
print("Columns in model_df:", model_df.columns.tolist())
model_df.head(20)



Shape of model_df: (9161, 10)
Columns in model_df: ['DATEPRD', 'WELL_BORE_CODE', 'ON_STREAM_HRS', 'AVG_DOWNHOLE_PRESSURE', 'AVG_WHP_P', 'AVG_WHT_P', 'DP_CHOKE_SIZE', 'BORE_GAS_VOL', 'BORE_WAT_VOL', 'TARGET']


Unnamed: 0,DATEPRD,WELL_BORE_CODE,ON_STREAM_HRS,AVG_DOWNHOLE_PRESSURE,AVG_WHP_P,AVG_WHT_P,DP_CHOKE_SIZE,BORE_GAS_VOL,BORE_WAT_VOL,TARGET
0,2014-04-07,NO 15/9-F-1 C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2014-04-08,NO 15/9-F-1 C,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
2,2014-04-09,NO 15/9-F-1 C,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
3,2014-04-10,NO 15/9-F-1 C,0.0,,0.0,0.0,0.0,0.0,0.0,0.0
4,2014-04-11,NO 15/9-F-1 C,0.0,310.37614,33.09788,10.47992,33.07195,0.0,0.0,0.0
5,2014-04-12,NO 15/9-F-1 C,0.0,303.50078,22.05334,8.70429,22.05334,0.0,0.0,0.0
6,2014-04-13,NO 15/9-F-1 C,0.0,303.53481,27.50281,9.42315,16.16326,0.0,0.0,0.0
7,2014-04-14,NO 15/9-F-1 C,0.0,303.78228,20.99552,8.13137,20.73712,0.0,0.0,0.0
8,2014-04-15,NO 15/9-F-1 C,0.0,303.85821,13.91754,8.49833,12.18153,0.0,0.0,0.0
9,2014-04-16,NO 15/9-F-1 C,0.0,303.79187,4.11994,8.82124,1.4902,0.0,0.0,0.0


In [11]:
for col in num_features:
    model_df[col] = model_df[col].fillna(model_df[col].median())

top_wells = model_df[cat_col].value_counts().nlargest(10).index
model_df["WELL_CAT"] = model_df[cat_col].where(model_df[cat_col].isin(top_wells), "Others")
model_df.head(2)

Unnamed: 0,DATEPRD,WELL_BORE_CODE,ON_STREAM_HRS,AVG_DOWNHOLE_PRESSURE,AVG_WHP_P,AVG_WHT_P,DP_CHOKE_SIZE,BORE_GAS_VOL,BORE_WAT_VOL,TARGET,WELL_CAT
0,2014-04-07,NO 15/9-F-1 C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NO 15/9-F-1 C
1,2014-04-08,NO 15/9-F-1 C,0.0,232.896939,0.0,0.0,0.0,0.0,0.0,0.0,NO 15/9-F-1 C


In [12]:
ohe=OneHotEncoder(sparse_output=False, handle_unknown='ignore')
well_ohe = ohe.fit_transform(model_df[["WELL_CAT"]])
well_cols = [f"WELL_{val}" for val in ohe.categories_[0]]
well_df = pd.DataFrame(well_ohe, columns=well_cols, index=model_df.index)

x=pd.concat([model_df[num_features], well_df], axis=1)
y=model_df["TARGET"].astype(float)

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scalar = StandardScaler()
x_train_num= scalar.fit_transform(x_train[num_features])
x_test_num = scalar.transform(x_test[num_features])

x_train.final = np.hstack([x_train_num, x_train[well_cols].values])
x_test.final = np.hstack([x_test_num, x_test[well_cols].values])
x_train.final.shape, x_test.final.shape


  x_train.final = np.hstack([x_train_num, x_train[well_cols].values])
  x_test.final = np.hstack([x_test_num, x_test[well_cols].values])


((7328, 13), (1833, 13))

In [14]:
model = keras.Sequential([
    layers.Input(shape=(x_train.final.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2)
    layers.Dense(1)
])

SyntaxError: invalid syntax. Perhaps you forgot a comma? (154592297.py, line 4)