# Model 1 - Room Climate Prediction

## EDA dan Preprocessing

### 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

### 2. Load Dataset

In [None]:
# Define column names based on the CSV structure
column_names = ['<EID>', '<AbsT>', '<RelT>', '<NID>', '<Temp>', '<RelH>', '<L1>', '<L2>', 
                '<Occ>', '<Act>', '<Door>', '<Win>']

# Load the dataset
df = pd.read_csv('datasets-location_A/room_climate-location_A-measurement38.csv', 
                 names=column_names, header=None)

print("Dataset berhasil dimuat!")
print(f"Ukuran dataset: {df.shape}")

### 3. Inspeksi Data

In [None]:
# Tampilkan 5 baris pertama
print("=" * 80)
print("HEAD - 5 Baris Pertama:")
print("=" * 80)
df.head()

In [None]:
# Tampilkan info dataset
print("=" * 80)
print("INFO - Informasi Dataset:")
print("=" * 80)
df.info()

### 4. Filter Sensor (Node) A1

Pilih hanya data dari sensor A1 untuk dijadikan fokus model.

In [None]:
# Cek unique nodes yang tersedia
print("Node yang tersedia dalam dataset:")
print(df['<NID>'].unique())
print(f"\nJumlah data per node:")
print(df['<NID>'].value_counts().sort_index())

# Filter data untuk sensor A1 (node = 1)
df_a1 = df[df['<NID>'] == 1].copy()

print(f"\n{'='*80}")
print(f"Data setelah filter untuk Node A1 (Node = 1):")
print(f"{'='*80}")
print(f"Ukuran dataset A1: {df_a1.shape}")
print(f"\nSample data A1:")
df_a1.head()

### 5. Pilih Fitur (X) - Input Features

Pilih kolom-kolom yang akan dijadikan fitur input: `<Temp>`, `<RelH>`, `<L1>`, dan `<L2>`.

In [None]:
# Pilih kolom fitur (X)
feature_columns = ['<Temp>', '<RelH>', '<L1>', '<L2>']
X = df_a1[feature_columns].copy()

print(f"{'='*80}")
print(f"Fitur Input (X):")
print(f"{'='*80}")
print(f"Shape: {X.shape}")
print(f"Kolom: {list(X.columns)}")
print(f"\nSample data X:")
X.head()

### 6. Buat Label (y) - Target Variable

Buat target variable dengan menggeser kolom `<Temp>` satu langkah ke belakang (shift(-1)). Ini akan mengambil suhu dari baris berikutnya sebagai target prediksi.

In [None]:
# Buat target variable (y) dengan shift(-1)
# Ini akan mengambil suhu dari baris berikutnya sebagai target
y = df_a1['<Temp>'].shift(-1)

print(f"{'='*80}")
print(f"Target Variable (y):")
print(f"{'='*80}")
print(f"Shape: {y.shape}")
print(f"\nSample data y (5 baris pertama):")
print(y.head())
print(f"\nCek nilai NaN di y:")
print(f"Jumlah NaN: {y.isna().sum()}")
print(f"Posisi NaN: {y[y.isna()].index.tolist()}")

### 7. Bersihkan Data

Karena `shift(-1)` menghasilkan satu nilai NaN di baris terakhir, kita perlu menghapus baris tersebut dari X dan y.

In [None]:
# Hapus baris terakhir yang mengandung NaN
print(f"{'='*80}")
print(f"Sebelum pembersihan:")
print(f"{'='*80}")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

# Bersihkan data dengan menghapus baris terakhir
X = X.iloc[:-1]
y = y.iloc[:-1]

print(f"\n{'='*80}")
print(f"Setelah pembersihan:")
print(f"{'='*80}")
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"\nCek NaN di y setelah pembersihan: {y.isna().sum()}")
print(f"Cek NaN di X setelah pembersihan: {X.isna().sum().sum()}")

### 8. Statistik Deskriptif dan Visualisasi

Mari kita lihat statistik deskriptif dari data yang telah dibersihkan.

In [None]:
# Statistik deskriptif untuk fitur
print(f"{'='*80}")
print(f"Statistik Deskriptif Fitur (X):")
print(f"{'='*80}")
print(X.describe())

print(f"\n{'='*80}")
print(f"Statistik Deskriptif Target (y):")
print(f"{'='*80}")
print(y.describe())

In [None]:
# Visualisasi korelasi antar fitur
plt.figure(figsize=(10, 8))
correlation_matrix = X.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Korelasi Antar Fitur (Sensor A1)', fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

print("\nMatriks Korelasi:")
print(correlation_matrix)

### 9. Ringkasan Preprocessing

Data telah siap untuk digunakan dalam model machine learning!

In [None]:
print("="*80)
print("RINGKASAN PREPROCESSING")
print("="*80)
print(f"\n1. Dataset Awal:")
print(f"   - File: room_climate-location_A-measurement38.csv")
print(f"   - Total baris: {df.shape[0]}")
print(f"   - Total kolom: {df.shape[1]}")

print(f"\n2. Filter Sensor:")
print(f"   - Sensor dipilih: A1 (Node = 1)")
print(f"   - Jumlah data sensor A1: {df_a1.shape[0]}")

print(f"\n3. Fitur (X):")
print(f"   - Kolom: {list(X.columns)}")
print(f"   - Shape: {X.shape}")

print(f"\n4. Target (y):")
print(f"   - Target: Suhu (Temp) pada timestep berikutnya")
print(f"   - Shape: {y.shape}")

print(f"\n5. Data Bersih:")
print(f"   - Total sampel: {X.shape[0]}")
print(f"   - Tidak ada nilai NaN")
print(f"   - Data siap untuk training model!")

---

## Data Splitting dan Scaling

### 10. Split Dataset untuk Training dan Validation

**Strategi Splitting:**
- **Training Set (80%)** + **Validation Set (20%)**: Dari `measurement38.csv`
- **Test Set (100%)**: Dari `measurement39.csv` (file terpisah)

Keuntungan pendekatan ini:
1. ‚úÖ Test set benar-benar independen dari training
2. ‚úÖ Menghindari data leakage
3. ‚úÖ Evaluasi model lebih objektif

In [None]:
from sklearn.model_selection import train_test_split

# Split data dari measurement38.csv menjadi 80% training dan 20% validation
X_train, X_val, y_train, y_val = train_test_split(
    X, y, 
    test_size=0.2,      # 20% untuk validation
    random_state=42,    # untuk reproducibility
    shuffle=False       # False karena ini time series data
)

print(f"{'='*80}")
print(f"SPLIT DATA DARI MEASUREMENT38.CSV:")
print(f"{'='*80}")
print(f"\nTraining Set:")
print(f"  - X_train shape: {X_train.shape}")
print(f"  - y_train shape: {y_train.shape}")
print(f"  - Persentase: {(len(X_train) / len(X)) * 100:.1f}%")

print(f"\nValidation Set:")
print(f"  - X_val shape: {X_val.shape}")
print(f"  - y_val shape: {y_val.shape}")
print(f"  - Persentase: {(len(X_val) / len(X)) * 100:.1f}%")

print(f"\n{'='*80}")
print(f"Total data dari measurement38.csv: {len(X)} samples")
print(f"{'='*80}")

### 11. Load dan Preprocess Test Set (measurement39.csv)

Load dataset testing dari file terpisah dan terapkan preprocessing yang sama seperti training set.

In [None]:
# Load test dataset dari measurement39.csv
df_test = pd.read_csv('datasets-location_A/room_climate-location_A-measurement39.csv', 
                      names=column_names, header=None)

print(f"{'='*80}")
print(f"LOAD TEST DATASET (MEASUREMENT39.CSV):")
print(f"{'='*80}")
print(f"Shape: {df_test.shape}")
print(f"\nSample data:")
print(df_test.head())

# Filter untuk sensor A1 (Node = 1)
df_test_a1 = df_test[df_test['<NID>'] == 1].copy()

print(f"\n{'='*80}")
print(f"Filter untuk Node A1:")
print(f"{'='*80}")
print(f"Shape setelah filter: {df_test_a1.shape}")

# Ekstrak fitur dan target untuk test set (PREPROCESSING SAMA SEPERTI TRAINING)
X_test = df_test_a1[feature_columns].copy()
y_test = df_test_a1['<Temp>'].shift(-1)

# Bersihkan NaN di baris terakhir
X_test = X_test.iloc[:-1]
y_test = y_test.iloc[:-1]

print(f"\n{'='*80}")
print(f"TEST SET (dari measurement39.csv):")
print(f"{'='*80}")
print(f"  - X_test shape: {X_test.shape}")
print(f"  - y_test shape: {y_test.shape}")
print(f"  - Tidak ada NaN: {X_test.isna().sum().sum() == 0 and y_test.isna().sum() == 0}")

print(f"\n{'='*80}")

### 12. Feature Scaling dengan StandardScaler

**Penting untuk Deep Learning!** Scaling membantu model konvergen lebih cepat dan stabil.

**Catatan Penting:**
- Fit scaler **HANYA** pada training set
- Transform training, validation, dan test set menggunakan scaler yang sama
- Ini mencegah **data leakage** dari validation/test set

In [None]:
from sklearn.preprocessing import StandardScaler

# Initialize scaler untuk fitur (X) dan target (y)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# FIT scaler HANYA pada training set
scaler_X.fit(X_train)
scaler_y.fit(y_train.values.reshape(-1, 1))

print(f"{'='*80}")
print(f"FEATURE SCALING PARAMETERS (dari Training Set):")
print(f"{'='*80}")
print(f"\nScaler X (Features):")
print(f"  Mean: {scaler_X.mean_}")
print(f"  Std:  {scaler_X.scale_}")

print(f"\nScaler y (Target):")
print(f"  Mean: {scaler_y.mean_[0]:.4f}")
print(f"  Std:  {scaler_y.scale_[0]:.4f}")

print(f"\n{'='*80}")

### 13. Transform Semua Dataset dengan Scaler

Terapkan scaling ke training, validation, dan test set menggunakan parameter dari training set.

In [None]:
# Transform fitur (X) untuk semua set
X_train_scaled = scaler_X.transform(X_train)
X_val_scaled = scaler_X.transform(X_val)
X_test_scaled = scaler_X.transform(X_test)

# Transform target (y) untuk semua set
y_train_scaled = scaler_y.transform(y_train.values.reshape(-1, 1)).flatten()
y_val_scaled = scaler_y.transform(y_val.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()

print(f"{'='*80}")
print(f"DATA SETELAH SCALING:")
print(f"{'='*80}")

print(f"\n1Ô∏è‚É£  TRAINING SET (80% dari measurement38.csv)")
print(f"   ‚îú‚îÄ X_train_scaled shape: {X_train_scaled.shape}")
print(f"   ‚îú‚îÄ y_train_scaled shape: {y_train_scaled.shape}")
print(f"   ‚îú‚îÄ X_train_scaled mean: {X_train_scaled.mean(axis=0)}")
print(f"   ‚îî‚îÄ X_train_scaled std:  {X_train_scaled.std(axis=0)}")

print(f"\n2Ô∏è‚É£  VALIDATION SET (20% dari measurement38.csv)")
print(f"   ‚îú‚îÄ X_val_scaled shape: {X_val_scaled.shape}")
print(f"   ‚îî‚îÄ y_val_scaled shape: {y_val_scaled.shape}")

print(f"\n3Ô∏è‚É£  TEST SET (100% dari measurement39.csv)")
print(f"   ‚îú‚îÄ X_test_scaled shape: {X_test_scaled.shape}")
print(f"   ‚îî‚îÄ y_test_scaled shape: {y_test_scaled.shape}")

print(f"\n{'='*80}")
print(f"‚úÖ Semua data telah di-scale dan siap untuk model Deep Learning!")
print(f"{'='*80}")

### 14. Visualisasi Perbandingan Data Sebelum dan Sesudah Scaling

Mari kita lihat efek scaling pada distribusi data.

In [None]:
# Visualisasi perbandingan sebelum dan sesudah scaling
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
fig.suptitle('Perbandingan Distribusi Data: Sebelum vs Sesudah Scaling', 
             fontsize=16, fontweight='bold')

feature_names = ['<Temp>', '<RelH>', '<L1>', '<L2>']

# Baris pertama: Sebelum Scaling
for i, feature in enumerate(feature_names):
    axes[0, i].hist(X_train[feature], bins=30, edgecolor='black', 
                    alpha=0.7, color='lightcoral')
    axes[0, i].set_title(f'{feature} - Sebelum Scaling', fontweight='bold')
    axes[0, i].set_xlabel('Nilai')
    axes[0, i].set_ylabel('Frekuensi')
    axes[0, i].grid(True, alpha=0.3)

# Baris kedua: Sesudah Scaling
for i in range(4):
    axes[1, i].hist(X_train_scaled[:, i], bins=30, edgecolor='black', 
                    alpha=0.7, color='lightgreen')
    axes[1, i].set_title(f'{feature_names[i]} - Sesudah Scaling', fontweight='bold')
    axes[1, i].set_xlabel('Nilai (Scaled)')
    axes[1, i].set_ylabel('Frekuensi')
    axes[1, i].grid(True, alpha=0.3)
    
    # Tampilkan mean dan std
    mean_val = X_train_scaled[:, i].mean()
    std_val = X_train_scaled[:, i].std()
    axes[1, i].axvline(mean_val, color='red', linestyle='--', linewidth=2, 
                       label=f'Œº={mean_val:.2f}')
    axes[1, i].legend()

plt.tight_layout()
plt.show()

print("Perhatikan bahwa data yang sudah di-scale memiliki mean ‚âà 0 dan std ‚âà 1")

### 15. Ringkasan Final: Data Siap untuk Deep Learning Model

Semua preprocessing, splitting, dan scaling telah selesai!

In [None]:
print("="*80)
print("üéØ RINGKASAN FINAL - DATA PREPROCESSING & SPLITTING")
print("="*80)

print(f"\nüìÅ SUMBER DATA:")
print(f"   ‚îú‚îÄ Training + Validation: measurement38.csv (Node A1)")
print(f"   ‚îî‚îÄ Testing: measurement39.csv (Node A1)")

print(f"\nüìä PEMBAGIAN DATASET:")
print(f"\n   1Ô∏è‚É£  TRAINING SET (80% dari measurement38.csv)")
print(f"      ‚îú‚îÄ X_train_scaled: {X_train_scaled.shape} ‚Üí {X_train_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îú‚îÄ y_train_scaled: {y_train_scaled.shape} ‚Üí {y_train_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îî‚îÄ Total samples: {len(X_train_scaled):,}")

print(f"\n   2Ô∏è‚É£  VALIDATION SET (20% dari measurement38.csv)")
print(f"      ‚îú‚îÄ X_val_scaled: {X_val_scaled.shape} ‚Üí {X_val_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îú‚îÄ y_val_scaled: {y_val_scaled.shape} ‚Üí {y_val_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îî‚îÄ Total samples: {len(X_val_scaled):,}")

print(f"\n   3Ô∏è‚É£  TEST SET (100% dari measurement39.csv)")
print(f"      ‚îú‚îÄ X_test_scaled: {X_test_scaled.shape} ‚Üí {X_test_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îú‚îÄ y_test_scaled: {y_test_scaled.shape} ‚Üí {y_test_scaled.nbytes / 1024:.2f} KB")
print(f"      ‚îî‚îÄ Total samples: {len(X_test_scaled):,}")

print(f"\nüîß FITUR (INPUT):")
print(f"   {feature_columns}")

print(f"\nüéØ TARGET (OUTPUT):")
print(f"   Suhu (<Temp>) pada timestep berikutnya")

print(f"\nüìè SCALING:")
print(f"   ‚îú‚îÄ Method: StandardScaler (mean=0, std=1)")
print(f"   ‚îú‚îÄ Fitted on: Training set only")
print(f"   ‚îî‚îÄ Applied to: Train, Validation, Test")

print(f"\n‚úÖ STATUS:")
print(f"   ‚îú‚îÄ Preprocessing: ‚úì Selesai")
print(f"   ‚îú‚îÄ Data Splitting: ‚úì Selesai")
print(f"   ‚îú‚îÄ Feature Scaling: ‚úì Selesai")
print(f"   ‚îú‚îÄ No Missing Values: ‚úì Confirmed")
print(f"   ‚îî‚îÄ Ready for Deep Learning: ‚úì YES")

print("\n" + "="*80)
print("üöÄ Data siap untuk digunakan dalam model Deep Learning!")
print("="*80)

---

## Model Building - LSTM Architecture

### 16. Import TensorFlow/Keras Libraries

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import mean_absolute_error, mean_squared_error
import math

print(f"{'='*80}")
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")
if len(tf.config.list_physical_devices('GPU')) > 0:
    print(f"GPU Devices: {tf.config.list_physical_devices('GPU')}")
print(f"{'='*80}")

### 17. Prepare Data for LSTM - Create Sequences dengan Sliding Window

LSTM membutuhkan input 3D: `(samples, timesteps, features)`

Kita akan menggunakan **sliding window approach** untuk membuat sequences dari data time series.

In [None]:
def create_sequences(X, y, window_size):
    """
    Create sequences untuk LSTM menggunakan sliding window.
    
    Parameters:
    - X: Input features (numpy array)
    - y: Target values (numpy array)
    - window_size: Jumlah timesteps yang digunakan untuk prediksi
    
    Returns:
    - X_seq: Sequences dengan shape (samples, window_size, features)
    - y_seq: Target values dengan shape (samples,)
    """
    X_seq, y_seq = [], []
    
    for i in range(len(X) - window_size):
        # Ambil window_size timesteps sebagai input
        X_seq.append(X[i:i + window_size])
        # Target adalah nilai setelah window
        y_seq.append(y[i + window_size])
    
    return np.array(X_seq), np.array(y_seq)

# Set window size (jumlah timesteps yang digunakan untuk prediksi)
WINDOW_SIZE = 20  # Gunakan 20 timesteps sebelumnya untuk prediksi

print(f"{'='*80}")
print(f"MEMBUAT SEQUENCES UNTUK LSTM")
print(f"{'='*80}")
print(f"\nWindow Size: {WINDOW_SIZE} timesteps")
print(f"\nSetiap sample akan menggunakan {WINDOW_SIZE} timesteps sebelumnya")
print(f"untuk memprediksi suhu di timestep berikutnya.")
print(f"\n{'='*80}")

### 18. Apply Sliding Window ke Training, Validation, dan Test Set

In [None]:
# Create sequences untuk training set
X_train_seq, y_train_seq = create_sequences(X_train_scaled, y_train_scaled, WINDOW_SIZE)

# Create sequences untuk validation set
X_val_seq, y_val_seq = create_sequences(X_val_scaled, y_val_scaled, WINDOW_SIZE)

# Create sequences untuk test set
X_test_seq, y_test_seq = create_sequences(X_test_scaled, y_test_scaled, WINDOW_SIZE)

print(f"{'='*80}")
print(f"SHAPES SETELAH SEQUENCE CREATION:")
print(f"{'='*80}")

print(f"\nüìä TRAINING SET:")
print(f"   X_train_seq shape: {X_train_seq.shape} ‚Üí (samples, timesteps, features)")
print(f"   y_train_seq shape: {y_train_seq.shape}")
print(f"   Samples: {len(X_train_seq):,}")

print(f"\nüìä VALIDATION SET:")
print(f"   X_val_seq shape: {X_val_seq.shape}")
print(f"   y_val_seq shape: {y_val_seq.shape}")
print(f"   Samples: {len(X_val_seq):,}")

print(f"\nüìä TEST SET:")
print(f"   X_test_seq shape: {X_test_seq.shape}")
print(f"   y_test_seq shape: {y_test_seq.shape}")
print(f"   Samples: {len(X_test_seq):,}")

print(f"\n{'='*80}")
print(f"‚úÖ Data siap untuk LSTM Model!")
print(f"{'='*80}")

### 19. Build LSTM Model - Medium Complexity

**Arsitektur:**
- LSTM Layer 1: 64 units, return_sequences=True
- LSTM Layer 2: 32 units
- Dense Layer: 16 units (ReLU)
- Output Layer: 1 unit (Linear)

In [None]:
# Clear any previous models
keras.backend.clear_session()

# Build LSTM Model
model = models.Sequential([
    # Input Layer (implisit, tidak perlu didefinisikan)
    
    # LSTM Layer 1: 64 units
    layers.LSTM(
        units=64,
        return_sequences=True,  # Return sequences untuk LSTM layer berikutnya
        input_shape=(WINDOW_SIZE, X_train_seq.shape[2]),  # (timesteps, features)
        name='lstm_layer_1'
    ),
    layers.Dropout(0.2, name='dropout_1'),
    
    # LSTM Layer 2: 32 units
    layers.LSTM(
        units=32,
        return_sequences=False,  # Tidak return sequences karena ini layer terakhir
        name='lstm_layer_2'
    ),
    layers.Dropout(0.2, name='dropout_2'),
    
    # Dense Hidden Layer: 16 units
    layers.Dense(
        units=16,
        activation='relu',
        name='dense_hidden'
    ),
    
    # Output Layer: 1 unit (temperature prediction)
    layers.Dense(
        units=1,
        activation='linear',  # Linear untuk regression
        name='output_layer'
    )
])

print(f"{'='*80}")
print(f"üß† LSTM MODEL ARCHITECTURE")
print(f"{'='*80}")
model.summary()
print(f"{'='*80}")

### 20. Compile Model dengan MAE dan RMSE Metrics

In [None]:
# Define custom RMSE metric
def rmse(y_true, y_pred):
    return keras.backend.sqrt(keras.backend.mean(keras.backend.square(y_pred - y_true)))

# Compile model
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',  # Mean Squared Error sebagai loss function
    metrics=[
        'mae',   # Mean Absolute Error
        rmse     # Root Mean Squared Error (custom)
    ]
)

print(f"{'='*80}")
print(f"‚úÖ MODEL COMPILED")
print(f"{'='*80}")
print(f"\nüìã Configuration:")
print(f"   Optimizer: Adam (lr=0.001)")
print(f"   Loss Function: MSE (Mean Squared Error)")
print(f"   Metrics: MAE, RMSE")
print(f"\n{'='*80}")

### 21. Setup Callbacks untuk Training

In [None]:
# Setup callbacks
callbacks = [
    # Early Stopping: Stop training jika tidak ada improvement
    EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1,
        mode='min'
    ),
    
    # Model Checkpoint: Save model terbaik
    ModelCheckpoint(
        filepath='best_lstm_model.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=1,
        mode='min'
    ),
    
    # Reduce Learning Rate: Kurangi learning rate jika stuck
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7,
        verbose=1,
        mode='min'
    )
]

print(f"{'='*80}")
print(f"‚úÖ CALLBACKS CONFIGURED")
print(f"{'='*80}")
print(f"\n1Ô∏è‚É£  EarlyStopping:")
print(f"   - Monitor: val_loss")
print(f"   - Patience: 15 epochs")
print(f"   - Restore best weights: True")

print(f"\n2Ô∏è‚É£  ModelCheckpoint:")
print(f"   - Save to: best_lstm_model.keras")
print(f"   - Save best only: True")

print(f"\n3Ô∏è‚É£  ReduceLROnPlateau:")
print(f"   - Monitor: val_loss")
print(f"   - Factor: 0.5 (reduce by half)")
print(f"   - Patience: 5 epochs")

print(f"\n{'='*80}")

### 22. Train Model

Mulai training dengan data yang telah diproses!

In [None]:
# Training configuration
EPOCHS = 100
BATCH_SIZE = 32

print(f"{'='*80}")
print(f"üöÄ STARTING TRAINING")
print(f"{'='*80}")
print(f"\nüìã Training Configuration:")
print(f"   Epochs: {EPOCHS}")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Training Samples: {len(X_train_seq):,}")
print(f"   Validation Samples: {len(X_val_seq):,}")
print(f"\n{'='*80}\n")

# Train the model
history = model.fit(
    X_train_seq, y_train_seq,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    validation_data=(X_val_seq, y_val_seq),
    callbacks=callbacks,
    verbose=1
)

print(f"\n{'='*80}")
print(f"‚úÖ TRAINING COMPLETED!")
print(f"{'='*80}")

### 23. Visualisasi Training History

In [None]:
# Plot training history
fig, axes = plt.subplots(1, 3, figsize=(20, 5))
fig.suptitle('Training History - LSTM Model', fontsize=16, fontweight='bold')

# Plot 1: Loss
axes[0].plot(history.history['loss'], label='Training Loss', linewidth=2)
axes[0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
axes[0].set_title('Model Loss (MSE)', fontweight='bold')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: MAE
axes[1].plot(history.history['mae'], label='Training MAE', linewidth=2)
axes[1].plot(history.history['val_mae'], label='Validation MAE', linewidth=2)
axes[1].set_title('Mean Absolute Error (MAE)', fontweight='bold')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('MAE')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Plot 3: RMSE
axes[2].plot(history.history['rmse'], label='Training RMSE', linewidth=2)
axes[2].plot(history.history['val_rmse'], label='Validation RMSE', linewidth=2)
axes[2].set_title('Root Mean Squared Error (RMSE)', fontweight='bold')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('RMSE')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print final metrics
print(f"\n{'='*80}")
print(f"üìä FINAL TRAINING METRICS:")
print(f"{'='*80}")
print(f"\nTraining:")
print(f"   Loss (MSE): {history.history['loss'][-1]:.6f}")
print(f"   MAE: {history.history['mae'][-1]:.6f}")
print(f"   RMSE: {history.history['rmse'][-1]:.6f}")

print(f"\nValidation:")
print(f"   Loss (MSE): {history.history['val_loss'][-1]:.6f}")
print(f"   MAE: {history.history['val_mae'][-1]:.6f}")
print(f"   RMSE: {history.history['val_rmse'][-1]:.6f}")
print(f"\n{'='*80}")

### 24. Evaluasi Model pada Test Set

Evaluasi performa model pada test set yang independen (measurement39.csv).

In [None]:
# Evaluate on test set
test_results = model.evaluate(X_test_seq, y_test_seq, verbose=0)

print(f"{'='*80}")
print(f"üéØ TEST SET EVALUATION (measurement39.csv)")
print(f"{'='*80}")
print(f"\nTest Samples: {len(X_test_seq):,}")
print(f"\nüìä Test Metrics:")
print(f"   Loss (MSE): {test_results[0]:.6f}")
print(f"   MAE: {test_results[1]:.6f}")
print(f"   RMSE: {test_results[2]:.6f}")

# Make predictions
y_pred_scaled = model.predict(X_test_seq, verbose=0)

# Inverse transform untuk mendapatkan nilai asli (dalam derajat Celsius)
y_test_original = scaler_y.inverse_transform(y_test_seq.reshape(-1, 1)).flatten()
y_pred_original = scaler_y.inverse_transform(y_pred_scaled).flatten()

# Calculate metrics pada skala asli
mae_original = mean_absolute_error(y_test_original, y_pred_original)
rmse_original = math.sqrt(mean_squared_error(y_test_original, y_pred_original))

print(f"\nüìä Test Metrics (Skala Asli - ¬∞C):")
print(f"   MAE: {mae_original:.4f} ¬∞C")
print(f"   RMSE: {rmse_original:.4f} ¬∞C")

print(f"\nüí° Interpretasi:")
print(f"   Model memiliki rata-rata error {mae_original:.4f}¬∞C dalam memprediksi suhu.")
print(f"   RMSE menunjukkan error {rmse_original:.4f}¬∞C dengan sensitifitas terhadap outlier.")

print(f"\n{'='*80}")

### 25. Visualisasi Prediksi vs Aktual

In [None]:
# Visualisasi hasil prediksi
fig, axes = plt.subplots(2, 1, figsize=(18, 10))
fig.suptitle('LSTM Model - Prediksi vs Aktual (Test Set)', fontsize=16, fontweight='bold')

# Plot 1: Time Series - Prediksi vs Aktual
axes[0].plot(y_test_original, label='Actual Temperature', linewidth=2, alpha=0.7)
axes[0].plot(y_pred_original, label='Predicted Temperature', linewidth=2, alpha=0.7)
axes[0].set_title('Temperature Prediction Over Time', fontweight='bold', fontsize=14)
axes[0].set_xlabel('Sample Index')
axes[0].set_ylabel('Temperature (¬∞C)')
axes[0].legend(fontsize=12)
axes[0].grid(True, alpha=0.3)

# Add error band
error = np.abs(y_test_original - y_pred_original)
axes[0].fill_between(range(len(y_test_original)), 
                      y_pred_original - error, 
                      y_pred_original + error, 
                      alpha=0.2, color='red', label='Error Band')

# Plot 2: Scatter Plot - Prediksi vs Aktual
axes[1].scatter(y_test_original, y_pred_original, alpha=0.5, s=20)
axes[1].plot([y_test_original.min(), y_test_original.max()], 
             [y_test_original.min(), y_test_original.max()], 
             'r--', linewidth=2, label='Perfect Prediction')
axes[1].set_title('Predicted vs Actual Temperature (Scatter)', fontweight='bold', fontsize=14)
axes[1].set_xlabel('Actual Temperature (¬∞C)')
axes[1].set_ylabel('Predicted Temperature (¬∞C)')
axes[1].legend(fontsize=12)
axes[1].grid(True, alpha=0.3)

# Add R¬≤ score
from sklearn.metrics import r2_score
r2 = r2_score(y_test_original, y_pred_original)
axes[1].text(0.05, 0.95, f'R¬≤ = {r2:.4f}', 
             transform=axes[1].transAxes, 
             fontsize=14, verticalalignment='top',
             bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.show()

print(f"\n{'='*80}")
print(f"üìä ADDITIONAL METRICS:")
print(f"{'='*80}")
print(f"   R¬≤ Score: {r2:.4f}")
print(f"   Min Error: {error.min():.4f} ¬∞C")
print(f"   Max Error: {error.max():.4f} ¬∞C")
print(f"   Mean Error: {error.mean():.4f} ¬∞C")
print(f"{'='*80}")

### 26. Ringkasan Final Model

In [None]:
print("="*80)
print("üéØ LSTM MODEL - FINAL SUMMARY")
print("="*80)

print("\nüìÅ DATA:")
print(f"   Training Data: measurement38.csv (Node A1)")
print(f"   Test Data: measurement39.csv (Node A1)")
print(f"   Window Size: {WINDOW_SIZE} timesteps")

print("\nüß† ARSITEKTUR MODEL:")
print(f"   Type: LSTM (Long Short-Term Memory)")
print(f"   Complexity: Medium")
print(f"   Layers:")
print(f"      ‚îî‚îÄ LSTM Layer 1: 64 units")
print(f"      ‚îî‚îÄ LSTM Layer 2: 32 units")
print(f"      ‚îî‚îÄ Dense Layer: 16 units (ReLU)")
print(f"      ‚îî‚îÄ Output Layer: 1 unit (Linear)")
print(f"   Total Parameters: {model.count_params():,}")

print("\nüîß TRAINING CONFIGURATION:")
print(f"   Optimizer: Adam (lr=0.001)")
print(f"   Loss Function: MSE")
print(f"   Batch Size: {BATCH_SIZE}")
print(f"   Epochs: {len(history.history['loss'])}")
print(f"   Callbacks: EarlyStopping, ModelCheckpoint, ReduceLROnPlateau")

print("\nüìä PERFORMANCE METRICS:")
print(f"   Test MAE: {mae_original:.4f} ¬∞C")
print(f"   Test RMSE: {rmse_original:.4f} ¬∞C")
print(f"   R¬≤ Score: {r2:.4f}")

print("\nüíæ SAVED MODEL:")
print(f"   Filename: best_lstm_model.keras")
print(f"   Location: Current directory")

print("\n‚úÖ STATUS:")
print(f"   Training: ‚úì Completed")
print(f"   Validation: ‚úì Completed")
print(f"   Testing: ‚úì Completed")
print(f"   Model Saved: ‚úì Yes")

print("\n" + "="*80)
print("üöÄ Model siap untuk digunakan untuk prediksi suhu ruangan!")
print("="*80)