# LSTM Model for Cumulative Material Weight Prediction
## Predicting cumulative weight from January 1, 2025 to specified end dates

In [1]:
# Disabling GPU
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
import pandas as pd
import numpy as np
import warnings
from datetime import datetime
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')

2025-10-30 12:24:29.133033: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-30 12:24:29.178687: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-30 12:24:29.974868: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-30 12:24:29.974868: I tensorflow/core/util/port.cc:153] oneD

## 1. Load Data and Prediction Mapping

In [3]:
# Load datasets
print("📂 Loading datasets...")
receivals = pd.read_csv('data/kernel/receivals.csv')
receivals['date_arrival'] = pd.to_datetime(receivals['date_arrival'], utc=True)

prediction_mapping = pd.read_csv('data/prediction_mapping.csv')
prediction_mapping['forecast_start_date'] = pd.to_datetime(prediction_mapping['forecast_start_date'], utc=True)
prediction_mapping['forecast_end_date'] = pd.to_datetime(prediction_mapping['forecast_end_date'], utc=True)

sample_submission = pd.read_csv('data/sample_submission.csv')

print(f"✅ Receivals: {receivals.shape}")
print(f"✅ Prediction mapping: {prediction_mapping.shape}")
print(f"✅ Sample submission: {sample_submission.shape}")

📂 Loading datasets...
✅ Receivals: (122590, 10)
✅ Prediction mapping: (30450, 4)
✅ Sample submission: (30450, 2)
✅ Receivals: (122590, 10)
✅ Prediction mapping: (30450, 4)
✅ Sample submission: (30450, 2)


## 2. Prepare Cumulative Target Variable

In [4]:
def calculate_cumulative_weight(receivals_df, rm_id, start_date, end_date):
    """Calculate cumulative weight for a material between two dates"""
    mask = (
        (receivals_df['rm_id'] == rm_id) & 
        (receivals_df['date_arrival'] >= start_date) &
        (receivals_df['date_arrival'] <= end_date)
    )
    return receivals_df.loc[mask, 'net_weight'].sum()

print("🔄 Creating training dataset with cumulative targets...")

# Create daily aggregations per material
daily_data = receivals.groupby([
    receivals['date_arrival'].dt.date, 
    'rm_id'
]).agg({
    'net_weight': ['sum', 'count', 'mean', 'std']
}).reset_index()

daily_data.columns = ['date', 'rm_id', 'total_weight', 'num_deliveries', 'avg_weight', 'std_weight']
daily_data['date'] = pd.to_datetime(daily_data['date'], utc=True)
daily_data['std_weight'] = daily_data['std_weight'].fillna(0)
daily_data = daily_data.sort_values(['rm_id', 'date'])

print(f"✅ Daily aggregations created: {daily_data.shape}")

🔄 Creating training dataset with cumulative targets...
✅ Daily aggregations created: (41933, 6)


## 3. Feature Engineering for LSTM

In [5]:
print("🔧 Feature engineering...")

# Temporal features
daily_data['year'] = daily_data['date'].dt.year
daily_data['month'] = daily_data['date'].dt.month
daily_data['dayofweek'] = daily_data['date'].dt.dayofweek
daily_data['dayofyear'] = daily_data['date'].dt.dayofyear

# Cyclical encoding
daily_data['month_sin'] = np.sin(2 * np.pi * daily_data['month'] / 12)
daily_data['month_cos'] = np.cos(2 * np.pi * daily_data['month'] / 12)
daily_data['dayofweek_sin'] = np.sin(2 * np.pi * daily_data['dayofweek'] / 7)
daily_data['dayofweek_cos'] = np.cos(2 * np.pi * daily_data['dayofweek'] / 7)

# Lag features
for lag in [1, 7, 30]:
    daily_data[f'weight_lag_{lag}'] = daily_data.groupby('rm_id')['total_weight'].shift(lag)

# Rolling statistics
for window in [7, 30]:
    daily_data[f'weight_roll_mean_{window}'] = daily_data.groupby('rm_id')['total_weight'].transform(
        lambda x: x.rolling(window, min_periods=1).mean()
    )
    daily_data[f'weight_roll_std_{window}'] = daily_data.groupby('rm_id')['total_weight'].transform(
        lambda x: x.rolling(window, min_periods=1).std().fillna(0)
    )

# Fill NaN
daily_data = daily_data.fillna(0)

print(f"✅ Features engineered: {daily_data.shape[1]} columns")

🔧 Feature engineering...
✅ Features engineered: 21 columns


## 4. Create Sequences for LSTM

In [6]:
def create_cumulative_sequences(data, sequence_length=30):
    """Create sequences predicting cumulative future weights"""
    
    feature_cols = [col for col in data.columns if col not in ['date', 'rm_id', 'total_weight']]
    
    X_sequences = []
    y_cumulative = []
    metadata = []
    
    for rm_id in data['rm_id'].unique():
        material_data = data[data['rm_id'] == rm_id].sort_values('date').copy()
        
        if len(material_data) < sequence_length + 30:  # Need history + future
            continue
        
        for i in range(sequence_length, len(material_data) - 30):
            # Sequence of historical features
            seq = material_data[feature_cols].iloc[i-sequence_length:i].values
            
            # Target: cumulative weight for next 30 days
            future_weights = material_data['total_weight'].iloc[i:i+30].sum()
            
            X_sequences.append(seq)
            y_cumulative.append(future_weights)
            metadata.append({
                'rm_id': rm_id,
                'end_date': material_data['date'].iloc[i+29]
            })
    
    return np.array(X_sequences), np.array(y_cumulative), metadata

print("🔄 Creating LSTM sequences...")
sequence_length = 30

X_train, y_train, train_metadata = create_cumulative_sequences(daily_data, sequence_length)

print(f"✅ Training sequences: {X_train.shape}")
print(f"✅ Target shape: {y_train.shape}")

🔄 Creating LSTM sequences...
✅ Training sequences: (36366, 30, 18)
✅ Target shape: (36366,)
✅ Training sequences: (36366, 30, 18)
✅ Target shape: (36366,)


## 5. Scale Features

In [7]:
# Reshape for scaling
n_samples, n_timesteps, n_features = X_train.shape
X_train_reshaped = X_train.reshape(-1, n_features)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_train_scaled = X_train_scaled.reshape(n_samples, n_timesteps, n_features)

# Scale target
target_scaler = StandardScaler()
y_train_scaled = target_scaler.fit_transform(y_train.reshape(-1, 1)).flatten()

print(f"✅ Features scaled")

✅ Features scaled


## 6. Custom Quantile Loss

In [8]:
def quantile_loss(alpha=0.2):
    """Quantile loss for α=0.2"""
    def loss(y_true, y_pred):
        error = y_true - y_pred
        return tf.reduce_mean(tf.maximum(alpha * error, (alpha - 1) * error))
    return loss

## 7. Build LSTM Model

In [9]:
print("🏗️ Building LSTM model...")

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(n_timesteps, n_features)),
    Dropout(0.2),
    BatchNormalization(),
    
    LSTM(32),
    Dropout(0.2),
    BatchNormalization(),
    
    Dense(16, activation='relu'),
    Dropout(0.2),
    
    Dense(1, activation='linear')
])

model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss=quantile_loss(alpha=0.2),
    metrics=['mae']
)

model.summary()

🏗️ Building LSTM model...


2025-10-30 12:24:38.285752: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-10-30 12:24:38.285786: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:160] env: CUDA_VISIBLE_DEVICES="-1"
2025-10-30 12:24:38.285790: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] CUDA_VISIBLE_DEVICES is set to -1 - this hides all GPUs from CUDA
2025-10-30 12:24:38.285793: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:171] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module
2025-10-30 12:24:38.285796: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:176] retrieving CUDA diagnostic information for host: tuf
2025-10-30 12:24:38.285799: I external/local_xla/xla/stream_executor/cuda/cuda_diag

## 8. Train Model

In [10]:
print("🚀 Training model...")

callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
]

history = model.fit(
    X_train_scaled, y_train_scaled,
    validation_split=0.2,
    batch_size=32,
    epochs=50,
    callbacks=callbacks,
    verbose=1
)

print("✅ Training completed!")

🚀 Training model...
Epoch 1/50
Epoch 1/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - loss: 0.1182 - mae: 0.4569 - val_loss: 0.0551 - val_mae: 0.2359 - learning_rate: 0.0010
Epoch 2/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - loss: 0.1182 - mae: 0.4569 - val_loss: 0.0551 - val_mae: 0.2359 - learning_rate: 0.0010
Epoch 2/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0755 - mae: 0.2999 - val_loss: 0.0555 - val_mae: 0.2283 - learning_rate: 0.0010
Epoch 3/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0755 - mae: 0.2999 - val_loss: 0.0555 - val_mae: 0.2283 - learning_rate: 0.0010
Epoch 3/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - loss: 0.0665 - mae: 0.2716 - val_loss: 0.0573 - val_mae: 0.2027 - learning_rate: 0.0010
Epoch 4/50
[1m910/910[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - l

## 9. Generate Predictions for Submission

In [12]:
print("🎯 Generating submission predictions...")

# Define feature columns
feature_cols = [col for col in daily_data.columns if col not in ['date', 'rm_id', 'total_weight']]
print(f"Using {len(feature_cols)} features for prediction")

# Group prediction mapping by material
predictions_dict = {}

for rm_id in prediction_mapping['rm_id'].unique():
    material_history = daily_data[daily_data['rm_id'] == rm_id].copy()
    
    if len(material_history) < sequence_length:
        # Fallback: use historical median
        predictions_dict[rm_id] = receivals[receivals['rm_id'] == rm_id]['net_weight'].median()
        continue
    
    # Get last sequence
    last_sequence = material_history[feature_cols].iloc[-sequence_length:].values
    last_sequence_scaled = scaler.transform(last_sequence.reshape(-1, n_features)).reshape(1, sequence_length, n_features)
    
    # Predict
    pred_scaled = model.predict(last_sequence_scaled, verbose=0)
    pred_cumulative = target_scaler.inverse_transform(pred_scaled)[0, 0]
    
    predictions_dict[rm_id] = max(0, pred_cumulative)

# Create submission
submission = sample_submission.copy()
submission['predicted_weight'] = 0

for idx, row in prediction_mapping.iterrows():
    rm_id = row['rm_id']
    end_date = row['forecast_end_date']
    start_date = row['forecast_start_date']
    
    # Calculate days from start to end
    days_diff = (end_date - start_date).days
    
    # Scale prediction by time period
    base_prediction = predictions_dict.get(rm_id, receivals['net_weight'].median())
    scaled_prediction = base_prediction * (days_diff / 30)  # Scale from 30-day prediction
    
    submission.loc[submission['ID'] == row['ID'], 'predicted_weight'] = max(0, scaled_prediction)

# Ensure cumulative property: predictions should not decrease for same material
submission = submission.merge(prediction_mapping[['ID', 'rm_id', 'forecast_end_date']], on='ID')
submission = submission.sort_values(['rm_id', 'forecast_end_date'])

for rm_id in submission['rm_id'].unique():
    mask = submission['rm_id'] == rm_id
    submission.loc[mask, 'predicted_weight'] = submission.loc[mask, 'predicted_weight'].cummax()

submission = submission[['ID', 'predicted_weight']].sort_values('ID')

# Save submission
submission.to_csv('lstm_submission.csv', index=False)

print(f"✅ Submission saved: lstm_submission.csv")
print(f"📊 Predictions: {len(submission)}")
print(f"📈 Stats:\n{submission['predicted_weight'].describe()}")

🎯 Generating submission predictions...
Using 18 features for prediction
✅ Submission saved: lstm_submission.csv
📊 Predictions: 30450
📈 Stats:
count    3.045000e+04
mean     6.416033e+05
std      1.541047e+06
min      2.966667e+01
25%      1.451258e+04
50%      6.358117e+04
75%      5.218914e+05
max      1.952824e+07
Name: predicted_weight, dtype: float64
✅ Submission saved: lstm_submission.csv
📊 Predictions: 30450
📈 Stats:
count    3.045000e+04
mean     6.416033e+05
std      1.541047e+06
min      2.966667e+01
25%      1.451258e+04
50%      6.358117e+04
75%      5.218914e+05
max      1.952824e+07
Name: predicted_weight, dtype: float64
