In [23]:
import pandas as pd
import numpy as np
import os
from datetime import timedelta
import warnings
from joblib import dump
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout # เพิ่ม Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import r2_score 

warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', 5000) 
pd.set_option('display.max_columns', 50) 

# --- Project Setup & Hyperparameters ---

DATA_FILE_PATH = 'data/Training_Data_Final.xlsx' 
MODEL_PATH = 'models/inventory_model_lstm.pkl'
TIMESTEPS = 12 # มองข้อมูลย้อนหลัง 12 เดือน

# Features ที่ใช้ทำนาย (Usage_Qty ราย SKU เป็น Target)
X_features = ['day_index', 'month_num', 'year_num', 'Visit_Campus_Num', 
              'Patient_E', 'Patient_I', 'Patient_O', 'Total_SKU_Usage', 'Usage_Qty'] 

os.makedirs('models', exist_ok=True)
os.makedirs('data', exist_ok=True)
tf.random.set_seed(42)

print("--- 1. PROJECT SETUP ---")
print(f"Data file expected at: {DATA_FILE_PATH}")
print(f"Model will be saved to: {MODEL_PATH}")

# --- Load and Prepare Data (Same as XGBoost Part 1) ---

df_train = pd.DataFrame()
base_date = pd.to_datetime('2024-01-01')

try:
    # 1. โหลดไฟล์ Training Data Final
    df_raw = pd.read_excel(DATA_FILE_PATH) 
    df_raw.columns = df_raw.columns.astype(str).str.strip().str.replace(' ', '_')
    df_raw['Usage_Qty'] = pd.to_numeric(df_raw['Usage_Qty'], errors='coerce') 
    df_raw['Visit_Campus_Num'] = df_raw['Visit_Campus'].astype('category').cat.codes
    
    subset_for_dropna = ['Date', 'Usage_Qty', 'SKU', 'Lead_Time_Days', 'Safety_Stock_Qty', 
                         'Unit_Cost', 'Visit_Campus_Num', 'Patient_E', 'Patient_I', 'Patient_O']
    df_raw = df_raw.dropna(subset=subset_for_dropna)
    df_raw = df_raw[df_raw['Usage_Qty'] > 0]
    
    # 2. Aggregate Data to Monthly/Daily SKU Level
    agg_dict = {
        'Usage_Qty': 'sum',
        'Lead_Time_Days': 'first', 'Safety_Stock_Qty': 'first', 'Unit_Cost': 'first', 
        'Visit_Campus_Num': 'first', 'Patient_E': 'first', 'Patient_I': 'first', 'Patient_O': 'first',
    }
    df_raw = df_raw.groupby(['Date', 'SKU'], as_index=False).agg(agg_dict)
    df_raw['Total_SKU_Usage'] = df_raw.groupby('Date')['Usage_Qty'].transform('sum')

    # 3. Create Time Features
    base_date = df_raw['Date'].min()
    df_raw['day_index'] = (df_raw['Date'] - base_date).dt.days
    df_raw['month_num'] = df_raw['Date'].dt.month
    df_raw['year_num'] = df_raw['Date'].dt.year

    # 4. Sort and Prepare for LSTM Reshape
    df_raw = df_raw.sort_values(by=['SKU', 'Date']).reset_index(drop=True)
    df_train = df_raw.copy()
    
    final_item_list = df_raw['SKU'].unique().tolist()
    max_date = df_train['Date'].max()
    
    print(f"Final training shape: {df_train.shape}")
    print(f"✅ Loaded and transformed data successfully.")

except Exception as e:
    print(f"⚠️ **FATAL DATA ERROR** ({type(e).__name__}: {e}). Cannot continue.")
    exit(1)

# --- Cell 2: Scaling and Reshaping for LSTM (FIXED CONCATENATION) ---

def create_lstm_dataset(df_group, timesteps):
    """ฟังก์ชันสร้าง 3D Tensor สำหรับ LSTM"""
    data = df_group[X_features].values
    X, y = [], []
    
    # วนลูปเพื่อสร้าง samples (ต้องมีข้อมูลอย่างน้อย timesteps + 1 จุด)
    for i in range(len(data) - timesteps):
        # Input X: ข้อมูลย้อนหลัง 'timesteps' ช่วงเวลา
        X.append(data[i:(i + timesteps)])
        # Target Y: ยอดใช้ (Usage_Qty) ของช่วงเวลาถัดไป (t + timesteps)
        y.append(data[i + timesteps, X_features.index('Usage_Qty')]) 
    return np.array(X), np.array(y)

# 1. Scaling: ต้อง Scale ทุก Features ก่อน (สำคัญสำหรับ Deep Learning)
scaler = MinMaxScaler()
df_train[X_features] = scaler.fit_transform(df_train[X_features])

X_master, y_master = [], []

# 2. Reshaping: สร้างชุดข้อมูล LSTM สำหรับทุก SKU
for sku, df_group in df_train.groupby('SKU'):
    if len(df_group) > TIMESTEPS: # ควรมี > TIMESTEPS เพื่อให้มี sample อย่างน้อย 1 ชุด
        X_sku, y_sku = create_lstm_dataset(df_group, TIMESTEPS)
        
        # FIX: ตรวจสอบให้แน่ใจว่า X_sku มี Samples มากกว่า 0 ก่อนที่จะ concatenate
        if X_sku.shape[0] > 0:
            X_master.append(X_sku)
            y_master.append(y_sku)
        else:
             print(f"Skipping SKU {sku}: Insufficient data points for LSTM samples after calculation.")

# 3. Combine and Final Reshape
# FIX: ใช้ np.concatenate อย่างปลอดภัยเมื่อมีข้อมูลฝึกอบรม
if X_master:
    X_master = np.concatenate(X_master)
    y_master = np.concatenate(y_master)
    
    print(f"\n*** SUCCESS: TOTAL LSTM SAMPLES CREATED: {X_master.shape[0]} ***")
else:
    raise ValueError("No SKUs had enough historical data (at least TIMESTEPS + 1) to create LSTM training samples.")

print(f"\nLSTM Input Shape (X): {X_master.shape}")
print(f"LSTM Target Shape (Y): {y_master.shape}")


# --- Cell 3: Train LSTM Model (Deep Architecture + Dropout) ---

# 1. Define Model Architecture
model = Sequential()
# FIX: Layer 1: เพิ่ม Dropout 20%
model.add(LSTM(units=100, activation='relu', return_sequences=True, 
               input_shape=(TIMESTEPS, X_master.shape[2])))
model.add(Dropout(0.2)) 

# FIX: Layer 2: เพิ่ม Dropout 20%
model.add(LSTM(units=50, activation='relu')) 
model.add(Dropout(0.2)) 

# Output Layer
model.add(Dense(units=1)) 

# 2. Compile (ใช้ loss: mse เหมือนเดิม)
model.compile(optimizer='adam', loss='mse')

# 3. Train
# FIX: เพิ่ม epochs เป็น 200 รอบ
history = model.fit(X_master, y_master, epochs=200, batch_size=32, validation_split=0.1, verbose=1) 

print(f"\nLSTM Model Trained with Deep Architecture (200 epochs + Dropout).")

# --- Cell 4: Save Model and Metadata ---

# 1. สร้างการทำนายบนชุดข้อมูล Training ทั้งหมด (เพื่อคำนวณ R-squared)
y_pred_scaled = model.predict(X_master, verbose=0)

# 2. ทำ Inverse Transform เพื่อให้ได้ค่าจริง (Target)
dummy_pred_array = np.zeros((y_pred_scaled.shape[0], len(X_features)))
dummy_pred_array[:, X_features.index('Usage_Qty')] = y_pred_scaled.flatten()
y_pred_actual = scaler.inverse_transform(dummy_pred_array)[:, X_features.index('Usage_Qty')]

# 3. Inverse Transform ค่า y_master (Target จริง)
dummy_actual_array = np.zeros((y_master.shape[0], len(X_features)))
dummy_actual_array[:, X_features.index('Usage_Qty')] = y_master.flatten()
y_actual = scaler.inverse_transform(dummy_actual_array)[:, X_features.index('Usage_Qty')]

# 4. คำนวณ R-squared
r_squared_score = r2_score(y_actual, y_pred_actual)

print(f"\nModel Trained. R-squared Score (LSTM): {r_squared_score:.4f}")
print(f"Goal Check: AI Model Accuracy is >= 80% (0.80).")

# 5. Save Keras Model Structure and Weights
model.save('models/lstm_model_weights.h5') 

# 6. Save Metadata (รวม Scaler)
model_metadata = {
    'model_path': 'models/lstm_model_weights.h5',
    'base_date': base_date,
    'max_date': max_date,
    'item_list': final_item_list,
    'features': X_features,
    'timesteps': TIMESTEPS,
    'scaler': scaler
}
dump(model_metadata, MODEL_PATH)

print(f"Model weights saved to models/lstm_model_weights.h5")
print(f"Metadata saved successfully to {MODEL_PATH}")

--- 1. PROJECT SETUP ---
Data file expected at: data/Training_Data_Final.xlsx
Model will be saved to: models/inventory_model_lstm.pkl
Final training shape: (1330, 14)
✅ Loaded and transformed data successfully.

*** SUCCESS: TOTAL LSTM SAMPLES CREATED: 346 ***

LSTM Input Shape (X): (346, 12, 9)
LSTM Target Shape (Y): (346,)
Epoch 1/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step - loss: 0.0164 - val_loss: 0.0257
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0129 - val_loss: 0.0210
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0102 - val_loss: 0.0194
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0110 - val_loss: 0.0214
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0107 - val_loss: 0.0191
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9




Model Trained. R-squared Score (LSTM): 0.7335
Goal Check: AI Model Accuracy is >= 80% (0.80).
Model weights saved to models/lstm_model_weights.h5
Metadata saved successfully to models/inventory_model_lstm.pkl
