In [69]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from datetime import datetime


In [70]:
# Load dataset
df = pd.read_csv("/content/data_until_2024.csv")

In [73]:
df.head()

Unnamed: 0,Date,Min Temperature,Max Temperature,Average Temperature,Average Humidity,Rainfall,Sunshine Duration,Max Wind Speed,Wind Direction at Max Speed,Average Wind Speed,Most Wind Direction,station_id,station_name,region_name,latitude,longitude,region_id,province_id,province_name,Month-Year
0,2010-01-01,21.4,30.2,27.1,82.0,9.0,0.5,7.0,90,5.0,E,96001,Stasiun Meteorologi Maimun Saleh,Kota Sabang,5.87655,95.33785,20,1,Nanggroe Aceh Darussalam,01-2010
1,2010-01-02,21.0,29.6,25.7,95.0,24.0,0.2,6.0,90,4.0,E,96001,Stasiun Meteorologi Maimun Saleh,Kota Sabang,5.87655,95.33785,20,1,Nanggroe Aceh Darussalam,01-2010
2,2010-01-03,20.2,26.8,24.5,98.0,63.0,0.0,5.0,90,4.0,E,96001,Stasiun Meteorologi Maimun Saleh,Kota Sabang,5.87655,95.33785,20,1,Nanggroe Aceh Darussalam,01-2010
3,2010-01-04,21.0,29.2,25.8,90.0,0.0,0.1,4.0,225,3.0,SW,96001,Stasiun Meteorologi Maimun Saleh,Kota Sabang,5.87655,95.33785,20,1,Nanggroe Aceh Darussalam,01-2010
4,2010-01-08,21.8,29.8,26.8,91.0,3.0,0.6,5.0,90,4.0,E,96001,Stasiun Meteorologi Maimun Saleh,Kota Sabang,5.87655,95.33785,20,1,Nanggroe Aceh Darussalam,01-2010


In [75]:
#Define Features
categorical_features = ["Most Wind Direction"]
numerical_features = ["Min Temperature", "Max Temperature", "Average Temperature",
                      "Average Humidity", "Rainfall", "Sunshine Duration",
                      "Max Wind Speed", "Wind Direction at Max Speed",
                      "Average Wind Speed", "latitude", "longitude",
                      "region_id", "province_id", "station_id"]

target_columns = ["Min Temperature", "Max Temperature", "Average Temperature",
                  "Average Humidity", "Rainfall", "Sunshine Duration",
                  "Max Wind Speed", "Wind Direction at Max Speed", "Average Wind Speed"]

# 🔹 Add Date Features (Extract year, month, day, day of the week)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Date'] = pd.to_datetime(df['Date'])  # Ensure 'Date' column is datetime type
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['DayOfWeek'] = df['Date'].dt.dayofweek  # Monday=0, Sunday=6


df["Most Wind Direction"] = df["Most Wind Direction"].str.strip()
# 🔹 One-Hot Encode Categorical Features
encoder = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
encoded_cats = encoder.fit_transform(df[categorical_features])

# Convert to DataFrame
cat_df = pd.DataFrame(encoded_cats, columns=encoder.get_feature_names_out(categorical_features))

# 🔹 Merge Date Features with Numerical and Categorical Features
df_processed = pd.concat([df[numerical_features], cat_df, df[['Year', 'Month', 'Day', 'DayOfWeek']]], axis=1)
target_processed = pd.concat([df[target_columns], cat_df], axis=1)

# 🔹 Normalize Data
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(df_processed)
y_scaled = scaler_y.fit_transform(target_processed)


# 🔹 Create Time-Series Data (Sliding Window) with Province, Region, and Station IDs
def create_sequences_multi_province_region_station(X, y, province_ids, region_ids, station_ids, time_steps=7):
    X_seq, y_seq = [], []

    # Get unique combinations of province, region, and station
    unique_combinations = np.unique(list(zip(province_ids, region_ids, station_ids)), axis=0)

    for province, region, station in unique_combinations:
        # Create mask for each combination of province, region, and station
        mask = (province_ids == province) & (region_ids == region) & (station_ids == station)

        X_station = X[mask]
        y_station = y[mask]

        # Generate sequences for this combination of province, region, and station
        for i in range(len(X_station) - time_steps):
            X_seq.append(X_station[i:i + time_steps])
            y_seq.append(y_station[i + time_steps])

    return np.array(X_seq), np.array(y_seq)



X_seq, y_seq = create_sequences_multi_province_region_station(X_scaled, y_scaled, df['province_id'], df['region_id'], df['station_id'], time_steps=7)

# ✅ Check Shapes
print(f"✅ Final Input Shape for LSTM: {X_reshaped.shape}")  # (samples, 7, features)
print(f"✅ Final Target Shape: {y_reshaped.shape}")  # (samples, target_features)


✅ Final Input Shape for LSTM: (730295, 7, 27)
✅ Final Target Shape: (730295, 18)


In [76]:
print(np.isnan(X_reshaped).sum())
print(np.isinf(X_reshaped).sum())


0
0


In [78]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(7, X_reshaped.shape[2])),
    LSTM(32, return_sequences=False),
    Dense(16, activation="relu"),
    Dense(18)  # Changed from 9 to 18 to match target shape
])

model.compile(optimizer="adam", loss="mse")
model.summary()

  super().__init__(**kwargs)


In [79]:
# 🔹 Train Model
history = model.fit(X_reshaped, y_reshaped, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 10ms/step - loss: 0.0468 - val_loss: 0.0437
Epoch 2/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 11ms/step - loss: 0.0425 - val_loss: 0.0436
Epoch 3/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 12ms/step - loss: 0.0423 - val_loss: 0.0437
Epoch 4/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 11ms/step - loss: 0.0421 - val_loss: 0.0435
Epoch 5/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 11ms/step - loss: 0.0420 - val_loss: 0.0436
Epoch 6/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 11ms/step - loss: 0.0419 - val_loss: 0.0436
Epoch 7/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 11ms/step - loss: 0.0418 - val_loss: 0.0438
Epoch 8/10
[1m18258/18258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 11ms/step - loss: 0.0418 - v

In [80]:
import joblib
# 🔹 Save Model and Preprocessing Objects
model.save("weather_forecasting_model2.h5")  # Save LSTM Model
joblib.dump(scaler_X, "scaler_X2.pkl")  # Save Scaler for Features
joblib.dump(scaler_y, "scaler_y2.pkl")  # Save Scaler for Target
joblib.dump(encoder, "encoder2.pkl")  # Save One-Hot Encoder

print("✅ Model and preprocessing objects saved successfully!")



✅ Model and preprocessing objects saved successfully!
