In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score

2024-11-30 18:11:28.167818: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-30 18:11:28.252609: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-30 18:11:28.252669: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-30 18:11:28.252737: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-30 18:11:28.271647: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-30 18:11:28.272470: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [2]:
data = pd.read_csv('arduino_data_2024-11-27 15:56:42.146692+00:00.csv')
data.head()

Unnamed: 0,Time,Soil_Moisture,Light_Intensity,Pump Signal
0,2024-11-27 15:57:12.247578+00:00,374,697,OFF
1,2024-11-27 15:57:42.248747+00:00,367,694,OFF
2,2024-11-27 15:58:12.249873+00:00,367,679,OFF
3,2024-11-27 15:58:42.251259+00:00,367,672,OFF
4,2024-11-27 15:59:12.252268+00:00,367,674,OFF


Preprocessing

In [3]:
data['Time'] = pd.to_datetime(data['Time'])
data['hour'] = data['Time'].dt.hour
data['day'] = data['Time'].dt.dayofweek
data['month'] = data['Time'].dt.month

# Encode pump signal
data['Pump Signal'] = data['Pump Signal'].map({'OFF': 0, 'ON': 1})

# Normalize values
scaler = MinMaxScaler()
data[['Soil_Moisture', 'Light_Intensity']] = scaler.fit_transform(data[['Soil_Moisture', 'Light_Intensity']])
data.head()

Unnamed: 0,Time,Soil_Moisture,Light_Intensity,Pump Signal,hour,day,month
0,2024-11-27 15:57:12.247578+00:00,0.076859,0.491577,0.0,15,2,11
1,2024-11-27 15:57:42.248747+00:00,0.074909,0.48928,0.0,15,2,11
2,2024-11-27 15:58:12.249873+00:00,0.074909,0.477795,0.0,15,2,11
3,2024-11-27 15:58:42.251259+00:00,0.074909,0.472435,0.0,15,2,11
4,2024-11-27 15:59:12.252268+00:00,0.074909,0.473966,0.0,15,2,11


In [4]:
# Keep previous time-step values (lagged features) to pass as input in time series prediction
n_lags = 3
for lag in range(1, n_lags + 1):
    data[f'Soil_Moisture_Lag{lag}'] = data['Soil_Moisture'].shift(lag)
    data[f'Light_Intensity_Lag{lag}'] = data['Light_Intensity'].shift(lag)
    data[f'Pump_Signal_Lag{lag}'] = data['Pump Signal'].shift(lag)
data.dropna(inplace=True)
data.head()

Unnamed: 0,Time,Soil_Moisture,Light_Intensity,Pump Signal,hour,day,month,Soil_Moisture_Lag1,Light_Intensity_Lag1,Pump_Signal_Lag1,Soil_Moisture_Lag2,Light_Intensity_Lag2,Pump_Signal_Lag2,Soil_Moisture_Lag3,Light_Intensity_Lag3,Pump_Signal_Lag3
3,2024-11-27 15:58:42.251259+00:00,0.074909,0.472435,0.0,15,2,11,0.074909,0.477795,0.0,0.074909,0.48928,0.0,0.076859,0.491577,0.0
4,2024-11-27 15:59:12.252268+00:00,0.074909,0.473966,0.0,15,2,11,0.074909,0.472435,0.0,0.074909,0.477795,0.0,0.074909,0.48928,0.0
5,2024-11-27 15:59:42.253556+00:00,0.074909,0.465544,0.0,15,2,11,0.074909,0.473966,0.0,0.074909,0.472435,0.0,0.074909,0.477795,0.0
6,2024-11-27 16:00:12.254705+00:00,0.074909,0.457121,0.0,16,2,11,0.074909,0.465544,0.0,0.074909,0.473966,0.0,0.074909,0.472435,0.0
7,2024-11-27 16:00:42.256228+00:00,0.074631,0.454824,0.0,16,2,11,0.074909,0.457121,0.0,0.074909,0.465544,0.0,0.074909,0.473966,0.0


In [14]:
# Prepare data for the model
features = ['Soil_Moisture', 'Light_Intensity', 'hour', 'day', 'month'] + \
           [f'Soil_Moisture_Lag{lag}' for lag in range(1, n_lags + 1)] + \
           [f'Light_Intensity_Lag{lag}' for lag in range(1, n_lags + 1)] + \
           [f'Pump_Signal_Lag{lag}' for lag in range(1, n_lags + 1)]

X = data[features].values
y = data['Pump Signal'].values
print(X.shape)

# Reshape the input for the LSTM (samples, timesteps, features)
n_features = len(features)  # Total number of features per sample
timesteps = n_lags      # Time steps we are using

# Reshape X based on total features per timestep
n_samples = (X.shape[0]) // timesteps # Get number of rows that can be reshaped
X = X[:n_samples * timesteps]
y = y[:n_samples * timesteps]

X = X.reshape(-1, timesteps, len(features))
y = y.reshape(n_samples, timesteps)

y = y[:, -1]
y = y.reshape(-1, 1)

print(X.shape)
print(y.shape)
# X = X.reshape(n_samples // timesteps, timesteps, n_features)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
print(X_train.shape, y_train.shape)

(8552, 14)
(2850, 3, 14)
(2850, 1)
(2280, 3, 14) (2280, 1)


In [6]:
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_binary))
print(classification_report(y_test, y_pred_binary, target_names=["Pump OFF", "Pump ON"]))

Accuracy: 0.9929824561403509
              precision    recall  f1-score   support

    Pump OFF       0.99      1.00      1.00       566
     Pump ON       0.00      0.00      0.00         4

    accuracy                           0.99       570
   macro avg       0.50      0.50      0.50       570
weighted avg       0.99      0.99      0.99       570



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# WORKING WITH SMOTE

In [8]:
from imblearn.over_sampling import SMOTE

In [None]:
X = data[features].values
y = data['Pump Signal'].values
print(X.shape)

(8552, 14)
(2850, 3, 14) (2850, 1)


In [None]:
X = data[features].values
y = data['Pump Signal'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)
print(X_train.shape, y_train.shape)

# SMOTE after splitting to preserve the data integrity of the test set
smote = SMOTE(sampling_strategy='minority', random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)
print(X_train.shape, y_train.shape)

(6841, 14) (6841,)
(13622, 14) (13622,)


In [None]:
# Reshape the input for the LSTM (samples, timesteps, features)
n_features = len(features)  # Total number of features per sample
timesteps = n_lags      # Time steps we are using

# Reshape X based on total features per timestep
n_samples = (X_train.shape[0]) // timesteps # Get number of rows that can be reshaped

X_train = X_train[:n_samples * timesteps]
y_train = y_train[:n_samples * timesteps]

X_train = X_train.reshape(-1, timesteps, len(features))
y_train = y_train.reshape(n_samples, timesteps)

y_train = y_train[:, -1]
y_train = y_train.reshape(-1, 1)
print(X_train.shape, y_train.shape)

(4539, 3, 14) (1513, 1)


In [25]:
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [34]:
n_samples_test = (X_test.shape[0]) // timesteps # Get number of rows that can be reshaped

X_test = X_test[:n_samples_test * timesteps]
y_test = y_test[:n_samples_test * timesteps]

X_test = X_test.reshape(-1, timesteps, len(features))
y_test = y_test.reshape(n_samples_test, timesteps)

y_test = y_test[:, -1]
y_test = y_test.reshape(-1, 1)

print(X_test.shape, y_test.shape)

y_pred_binary = (y_pred > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred_binary))
print(classification_report(y_test, y_pred_binary, target_names=["Pump OFF", "Pump ON"]))

(570, 3, 14) (570, 1)
Accuracy: 0.9947368421052631
              precision    recall  f1-score   support

    Pump OFF       0.99      1.00      1.00       567
     Pump ON       0.00      0.00      0.00         3

    accuracy                           0.99       570
   macro avg       0.50      0.50      0.50       570
weighted avg       0.99      0.99      0.99       570



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
