In [4]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score


data_path = 'final_merged_base_alberta_wildfire_with_gas_and_pipes_summary.csv'
df = pd.read_csv(data_path)
df['Year'] = pd.to_datetime(df['Month']).dt.year
df['Month'] = pd.to_datetime(df['Month']).dt.month

features = [
    'A', 'B', 'C', 'D', 'E',
    'Year', 'Month',
    'Gas Plant Frac', 'Gas Plant Sweet', 'Gp Acid Gas Flaring',
    'Gp Acid Gas Flaring%', 'Gp Acid Gas Inj', 'Gp Mainline Strdle',
    'Gp Sulphur Rcvry', 'Avg Capacity (1000 m3/d)'
]


for feature in features:
    if df[feature].dtype == object:
        df[feature] = df[feature].str.replace(',', '').astype(float)


scaler = MinMaxScaler(feature_range=(0, 1))
df_scaled = scaler.fit_transform(df[features + ['prod_%_diff']])
df_scaled = pd.DataFrame(df_scaled, columns=features + ['prod_%_diff'], index=df.index)

train_indices = df[(df['Year'] < 2018) | ((df['Year'] == 2018) & (df['Month'] <= 10))].index
test_indices = df[((df['Year'] == 2018) & (df['Month'] >= 11)) | (df['Year'] > 2018)].index

X_train = df_scaled.loc[train_indices, features].values.reshape((-1, 1, len(features)))
y_train = df_scaled.loc[train_indices, 'prod_%_diff'].values
X_test = df_scaled.loc[test_indices, features].values.reshape((-1, 1, len(features)))
y_test = df_scaled.loc[test_indices, 'prod_%_diff'].values

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(1, len(features))))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), verbose=2)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Test MSE: {mse}")
print(f"Test R-squared: {r2}")


Epoch 1/100
5/5 - 1s - loss: 0.2127 - val_loss: 0.2213 - 1s/epoch - 224ms/step
Epoch 2/100
5/5 - 0s - loss: 0.1852 - val_loss: 0.1916 - 28ms/epoch - 6ms/step
Epoch 3/100
5/5 - 0s - loss: 0.1589 - val_loss: 0.1639 - 29ms/epoch - 6ms/step
Epoch 4/100
5/5 - 0s - loss: 0.1350 - val_loss: 0.1377 - 27ms/epoch - 5ms/step
Epoch 5/100
5/5 - 0s - loss: 0.1133 - val_loss: 0.1144 - 28ms/epoch - 6ms/step
Epoch 6/100
5/5 - 0s - loss: 0.0962 - val_loss: 0.0946 - 29ms/epoch - 6ms/step
Epoch 7/100
5/5 - 0s - loss: 0.0829 - val_loss: 0.0795 - 28ms/epoch - 6ms/step
Epoch 8/100
5/5 - 0s - loss: 0.0743 - val_loss: 0.0689 - 27ms/epoch - 5ms/step
Epoch 9/100
5/5 - 0s - loss: 0.0696 - val_loss: 0.0619 - 27ms/epoch - 5ms/step
Epoch 10/100
5/5 - 0s - loss: 0.0647 - val_loss: 0.0572 - 27ms/epoch - 5ms/step
Epoch 11/100
5/5 - 0s - loss: 0.0599 - val_loss: 0.0535 - 25ms/epoch - 5ms/step
Epoch 12/100
5/5 - 0s - loss: 0.0555 - val_loss: 0.0503 - 26ms/epoch - 5ms/step
Epoch 13/100
5/5 - 0s - loss: 0.0513 - val_loss: 

In [3]:
print("Train DataFrame shape:", train_df_scaled.shape)
print("Test DataFrame shape:", test_df_scaled.shape)


Train DataFrame shape: (0, 16)
Test DataFrame shape: (36, 16)
