In [17]:
from flask import jsonify
import pandas as pd
from pandas import concat
import numpy as np
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error
from pandas import DataFrame
from datetime import datetime, timedelta
from keras.models import load_model
from keras.layers import Bidirectional, LSTM

In [18]:
# Path to CSV file
csv_path = "cuacabmkg.csv"
model_path = "model/my_model.keras"

hist_window = 3
horizon = 8

features = ["RAINFALL", "TEMP", "WINDDIR", "WINDSPEED", "HUMIDITY", "PRESSURE"]
features_num = len(features)

def FilterRAINFALL(val):
    if val < 0 or val > 1000:
        val = np.NaN
    return val


def FilterTEMP(val):
    if val < 21 or val > 37:
        val = np.NaN
    return val


def FilterWINDDIR(val):
    if val < 0 or val > 360:
        val = np.NaN
    return val


def FilterWINDSPEED(val):
    if val > 24:
        val = np.NaN
    return val


def FilterHUMIDITY(val):
    if val < 0 or val > 100:
        val = np.NaN
    return val


def FilterPRESSURE(val):
    if val < 1000 or val > 1018:
        val = np.NaN
    return val

In [19]:
def deret_waktu_regresi(dataset, target, start, end, window, horizon):
    X = []
    y = []
    start = start + window
    if end is None:
        end = len(dataset) - horizon

    for i in range(start, end):
        indices = range(i - window, i)
        if i + horizon > len(target):
            continue
        X.append(dataset[indices])

        # indicey = range(i+1, i+1+horizon)
        indicey = range(i, i + horizon)
        y.append(target[indicey])

    return np.array(X), np.array(y)

In [20]:
def preprocess_data(csv=csv_path):
    df = pd.read_csv(csv, parse_dates=["TIMESTAMP"], index_col="TIMESTAMP")
    
    features = df
    col_names = list(features.columns)
    s_scaler = preprocessing.StandardScaler()
    features = s_scaler.fit_transform(features)
    features = pd.DataFrame(features, columns=col_names)

    df["RAINFALL"] = df.apply(lambda row: FilterRAINFALL(row["RAINFALL"]), axis=1)
    df["TEMP"] = df.apply(lambda row: FilterTEMP(row["TEMP"]), axis=1)
    df["WINDDIR"] = df.apply(lambda row: FilterWINDDIR(row["WINDDIR"]), axis=1)
    df["WINDSPEED"] = df.apply(lambda row: FilterWINDSPEED(row["WINDSPEED"]), axis=1)
    df["HUMIDITY"] = df.apply(lambda row: FilterHUMIDITY(row["HUMIDITY"]), axis=1)
    df["PRESSURE"] = df.apply(lambda row: FilterPRESSURE(row["PRESSURE"]), axis=1)

    # resample rata-rata di jam dan hari yg sama pada semua tahun
    dfl = df.groupby(
        [df.index.month, df.index.day, df.index.hour], as_index=True
    ).mean()

    # mengisikan rata2 pada jam dan tanggal yg sama pada nilai NaN
    for kolom in list(df):
        index = df.index[df[kolom].apply(np.isnan)]
        for num, val in enumerate(index):
            df.loc[val, kolom] = dfl.loc[
                index.month[num], index.day[num], index.hour[num]
            ][kolom]

    x_scaler = preprocessing.MinMaxScaler()
    y_scaler = preprocessing.MinMaxScaler()
    dataX = x_scaler.fit_transform(df)
    
    variabel = ['RAINFALL', 'TEMP', 'WINDDIR', 'WINDSPEED', 'HUMIDITY', 'PRESSURE']
    target = []
    for i in range(horizon):
        target += variabel
        
    dataY = y_scaler.fit_transform(df[target])

    n = int(dataY.shape[1] / horizon)
    TRAIN_SPLIT = int(len(df))

    x_multi, y_multi = deret_waktu_regresi(
        dataX, dataY[:, :n], 0, TRAIN_SPLIT, hist_window, horizon
    )

    output = y_multi.shape[2] * y_multi.shape[1]
    y_multi = y_multi.reshape(-1, output, 1)

    return {
        "x_multi": x_multi,
        "y_scaler": y_scaler,
        "df": df,
    }

In [21]:
def model_prediction(filepath = model_path):
    data = preprocess_data(csv_path)
    if not data:
        return {"error": "Data not available. Please try again later."}
    
    # Convert data to DataFrame
    model = load_model(
        filepath, custom_objects={"Bidirectional": Bidirectional, "LSTM": LSTM}
    )
    
    x_multi = data["x_multi"]
    y_scaler = data["y_scaler"]
    
    
    new_columns = []
    for i in range(1, horizon + 1):
        new_columns += [x + str(i) for x in ["RAINFALL", "TEMP", "WINDDIR", "WINDSPEED", "HUMIDITY", "PRESSURE"]]

    # Make prediction
    pred = model.predict(x_multi)
    preds = y_scaler.inverse_transform(pred)
    predictions_df = pd.DataFrame(preds, columns = new_columns)
    return predictions_df

In [22]:
# Pengkategorian kondisi hujan dan arah angin
def classify_rainfall(value):
    if value > 20:
        return "Hujan Sangat Lebat"
    elif value > 10:
        return "Hujan Lebat"
    elif value > 5:
        return "Hujan Sedang"
    elif value > 1:
        return "Hujan Ringan"
    elif value > 0:
        return "Tidak Hujan"
    else:
        return "Cerah"
    
def classify_winddir(value):
    if 1 <= value < 90:
        return "Timur Laut (TL)"
    elif value == 90:
        return "Timur (T)"
    elif 90 < value < 180:
        return "Tenggara (TG)"
    elif value == 180:
        return "Selatan (S)"
    elif 180 < value < 270:
        return "Barat Daya (BD)"
    elif value == 270:
        return "Barat (B)"
    elif 270 < value <= 360:
        return "Barat Laut (BL)"
    else:
        return "Utara (U)"




In [23]:
def processing_prediction():
    data = model_prediction()

    # Calculate the mean for each column
    mean_values = data.mean()
    mean_values_rounded = mean_values.round(4)

    # Convert to a 1 x 48 array
    mean_array = mean_values_rounded.to_numpy().reshape(1, -1)
    forecast = mean_array.flatten()
    
    # Reshape the forecast array into 2 dimension array
    mean_array_reshaped = forecast.reshape(horizon, features_num)
    current_time = datetime.now().replace(minute=0, second=0, microsecond=0)
    
    data_list = []
    for i in range(8):
        hour_data = {"timestamp": current_time + timedelta(hours=i+1)}
        for j, feature in enumerate(features):
            hour_data[feature] = mean_array_reshaped[i, j]
            if feature == "RAINFALL":
                hour_data["rainfall_text"] = classify_rainfall(mean_array_reshaped[i, j])
            elif feature == "WINDDIR":
                hour_data["winddir_text"] = classify_winddir(mean_array_reshaped[i, j])
        data_list.append(hour_data)
        
    forecast_df = pd.DataFrame(data_list)

    result = {
        "result": forecast_df,
    }
    return result

In [24]:
processing_prediction()

  df = pd.read_csv(csv, parse_dates=["TIMESTAMP"], index_col="TIMESTAMP")
  saveable.load_own_variables(weights_store.get(inner_path))


[1m822/822[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step


{'result':             timestamp  RAINFALL rainfall_text       TEMP    WINDDIR  \
 0 2024-06-14 14:00:00    0.2991   Tidak Hujan  28.820499  92.870499   
 1 2024-06-14 15:00:00    0.2973   Tidak Hujan  28.834400  92.564102   
 2 2024-06-14 16:00:00    0.2939   Tidak Hujan  28.847300  92.363098   
 3 2024-06-14 17:00:00    0.2921   Tidak Hujan  28.859200  92.763802   
 4 2024-06-14 18:00:00    0.2930   Tidak Hujan  28.873199  92.885597   
 5 2024-06-14 19:00:00    0.2940   Tidak Hujan  28.880400  93.191902   
 6 2024-06-14 20:00:00    0.2939   Tidak Hujan  28.885900  92.866096   
 7 2024-06-14 21:00:00    0.2960   Tidak Hujan  28.882700  92.988800   
 
     winddir_text  WINDSPEED   HUMIDITY     PRESSURE  
 0  Tenggara (TG)     2.6581  76.306099  1009.368591  
 1  Tenggara (TG)     2.6397  76.294601  1009.369873  
 2  Tenggara (TG)     2.6290  76.261703  1009.372070  
 3  Tenggara (TG)     2.6302  76.228401  1009.370728  
 4  Tenggara (TG)     2.6445  76.190498  1009.360413  
 5  Tengga