In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Load data
data_path = "../sample/2023_smartFarm_AI_hackathon_dataset.csv"
data = pd.read_csv(data_path)

In [2]:
# Drop columns with all zeros
cols_to_drop = data.columns[data.sum(axis=0) == 0]
data.drop(columns=cols_to_drop, inplace=True)

In [3]:
# Convert frmDist to categorical and get dummies
data = pd.get_dummies(data, columns=['frmDist'], drop_first=True)

In [4]:
# Interpolate zeros for each zone
zones = [col for col in data.columns if 'frmDist_' in col]
for zone in zones:
    mask = data[zone] == 1
    zone_data = data[mask]
    zone_data = zone_data.where(zone_data != 0, np.nan)
    interpolated_zone_data = zone_data.interpolate(method='linear', limit_direction='both')
    data.loc[mask] = interpolated_zone_data

In [5]:
# Handle any residual NaN after interpolation
data.fillna(0, inplace=True)

In [6]:
# Feature engineering for the date column
data['date'] = pd.to_datetime(data['date'])
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['weekday'] = data['date'].dt.weekday

In [7]:
# Drop the original 'date' column
data.drop(columns=['date'], inplace=True)

In [8]:
# Splitting data
targets = ['outtrn_cumsum', 'HeatingEnergyUsage_cumsum']
X = data.drop(columns=targets)
y = data[targets]

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Normalize the data
scaler_X = MinMaxScaler()
scaler_Y = MinMaxScaler()

In [11]:
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_Y.fit_transform(y_train)
y_test_scaled = scaler_Y.transform(y_test)

In [12]:
# Reshape input data for LSTM
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], X_train_scaled.shape[1], 1)
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], X_test_scaled.shape[1], 1)

In [19]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train_reshaped.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(2))
model.compile(optimizer='adam', loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [20]:
# Train the model
early_stop = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train_reshaped, y_train_scaled, epochs=200, validation_split=0.2, callbacks=[early_stop])

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
# Predictions
y_pred_scaled = model.predict(X_test_reshaped)
y_pred = scaler_Y.inverse_transform(y_pred_scaled)

In [None]:
# Evaluation
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse}")
print(f"R2 Score: {r2}")