In [4]:
!pip install prophet




[notice] A new release of pip available: 22.3 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from prophet import Prophet
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

from utils import Pipeline

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [7]:
train_df = pd.read_csv('../data/split/train_data.csv')
test_df = pd.read_csv('../data/split/test_data.csv')

In [8]:
train_df.head()

Unnamed: 0,timestamp,light_air,light_ground,moisture_ground,dht_air_humidity,dht_air_temperature,dht_ground_humidity,dht_ground_temperature
0,1697063380,2311,1999,944,40,24.4,40,24.3
1,1697063416,2325,2006,929,40,24.3,40,24.3
2,1697063450,2320,2003,952,40,24.3,40,24.3
3,1697063484,2329,1999,950,40,24.3,40,24.2
4,1697063518,2307,1983,961,40,24.3,40,24.2


In [9]:
pipe_train_df = Pipeline(train_df)
pipe_test_df = Pipeline(test_df)

In [10]:
train_df = pipe_train_df.process()
train_df.head()

[+] Total missing values: 483


Unnamed: 0,light_air,moisture_ground,dht_air_humidity,dht_air_temperature,dht_ground_humidity,dht_ground_temperature,timestamp
58,0.334191,0.430707,0.888325,0.647217,0.620567,0.712714,1697065281
59,0.36866,0.430777,0.86802,0.654399,0.606383,0.716381,1697065315
60,0.402906,0.430632,0.847716,0.66158,0.595745,0.720049,1697065350
61,0.436765,0.430221,0.827411,0.668761,0.58156,0.70599,1697065382
62,0.465946,0.429684,0.809137,0.649372,0.570213,0.710513,1697065417


In [11]:
test_df = pipe_test_df.process()
test_df.head()

[+] Total missing values: 169


Unnamed: 0,light_air,moisture_ground,dht_air_humidity,dht_air_temperature,dht_ground_humidity,dht_ground_temperature,timestamp
58,0.25979,0.359033,0.623529,0.165829,0.675325,0.021978,1697357869
59,0.23995,0.345544,0.611765,0.190955,0.649351,0.054945,1697357909
60,0.263029,0.322991,0.6,0.211055,1.0,0.082418,1697357943
61,0.188763,0.255226,0.588235,0.085427,0.961039,0.10989,1697357978
62,0.207879,0.241651,0.576471,0.105528,0.922078,0.137363,1697358011


In [14]:
y_train = train_df['moisture_ground']
X_train = train_df.drop('moisture_ground', axis=1)
y_test = test_df['moisture_ground']
X_test = test_df.drop('moisture_ground', axis=1)

## Ensemble

* LSTM
* RainForest
* Prophet

In [47]:
class Ensemble:
    def __init__(self, train_df: pd.DataFrame, test_df: pd.DataFrame):
        self.lstm_model = None
        self.prophet_model = None
        self.rf_model = None
        self.train_df = train_df
        self.test_df = test_df

    def train_lstm(self, X_train, y_train, num_timesteps=50, lstm_units=64, epochs=10, batch_size=32):
        self.lstm_model = Sequential()
        self.lstm_model.add(LSTM(units=lstm_units, input_shape=(num_timesteps, 1)))
        self.lstm_model.add(Dense(units=1))
        self.lstm_model.compile(optimizer='adam', loss='mse')
        self.lstm_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0)

    def train_prophet(self, df):
        df['lag_1'] = df['moisture_ground'].shift(1)
        df['rolling_mean'] = df['moisture_ground'].rolling(window=3).mean()
        df = df.dropna()

        new_df = df.reset_index().rename(columns={'index': 'ds', 'moisture_ground': 'y'})
        new_df['ds'] = pd.to_datetime(new_df['timestamp'], unit='s')

        self.prophet_model = Prophet()
        self.prophet_model.fit(new_df)

    def train_rf(self, X_train, y_train):
        sliding_window = 48
        # Prepare train data
        self.slide_train_df = pd.DataFrame()
        for i in range(sliding_window+1):
            for column in train_df.columns:
                self.slide_train_df[f'{column}-{i}'] = train_df[column].shift(i)

        self.slide_train_df.dropna(inplace=True)
        slide_train_df_shuffled = self.slide_train_df.sample(frac=1, random_state=0)
        slide_train_df_x = slide_train_df_shuffled.iloc[:,2:]
        slide_train_df_y = slide_train_df_shuffled.iloc[:,:2]

        self.rf_model = RandomForestRegressor()
        self.rf_model.fit(slide_train_df_x, slide_train_df_y)

    def train(self):
        y_train = self.train_df['moisture_ground']
        X_train = self.train_df.drop('moisture_ground', axis=1)

        self.train_lstm(X_train.values, y_train.values)
        self.train_prophet(self.train_df)
        self.train_rf(X_train.values, y_train.values)

    def predict(self, X_test: pd.Series, y_test: pd.Series):
        lstm_pred = self.lstm_model.predict(X_test.values)
        # Generate predictions
        test_inputs = self.slide_train_df.iloc[-1, 2:].values.reshape(1, -1)
        rf_pred = []
        for _ in range(len(test_df)):
            prediction = self.model_rf.predict(test_inputs)
            rf_pred.append(prediction)
            test_inputs = np.roll(test_inputs, 2)
            test_inputs[0, :2] = prediction

        future = self.prophet_model.make_future_dataframe(periods=len(y_test), freq='30s', include_history=False)
        forecast = self.prophet_model.predict(future)
        prophet_pred = forecast['yhat'].values[-len(y_test):]

        ensemble_pred = np.mean([lstm_pred, prophet_pred, rf_pred], axis=0)
        return ensemble_pred

    def evaluate(self):
        y_test = self.test_df['moisture_ground']
        X_test = self.test_df.drop('moisture_ground', axis=1)

        ensemble_pred = self.predict(X_test, y_test)
        ensemble_rmse = np.sqrt(mean_squared_error(y_test, ensemble_pred))
        return ensemble_rmse


In [48]:
ens1 = Ensemble(train_df, test_df)

In [49]:
ens1.train()

17:58:49 - cmdstanpy - INFO - Chain [1] start processing
17:58:50 - cmdstanpy - INFO - Chain [1] done processing


KeyboardInterrupt: 

In [None]:
ens1.evaluate()

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step


ValueError: X has 6 features, but RandomForestRegressor is expecting 8 features as input.