In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [157]:
df = pd.read_csv('Model  2 dataset.csv')

In [158]:

# Convert Order_Date to datetime
df['Order_Date'] = pd.to_datetime(df['Order_Date'])

# Sort the data by product and date
df.sort_values(by=['Product_ID', 'Order_Date'], inplace=True)

In [159]:
print(df['Order_Date'].head())  

0   2021-03-20
1   2020-09-28
2   2023-02-06
3   2022-04-18
4   2022-11-22
Name: Order_Date, dtype: datetime64[ns]


In [160]:
# label_encoder = LabelEncoder()
label_encoder = LabelEncoder()
df['Product_ID'] = label_encoder.fit_transform(df['Product_ID'])
df['Category'] = label_encoder.fit_transform(df['Category'])
df['Sales_Channel'] = label_encoder.fit_transform(df['Sales_Channel'])
df['Customer_Segment'] = label_encoder.fit_transform(df['Customer_Segment'])
df['Seasonality'] = label_encoder.fit_transform(df['Seasonality'])
df['Holiday_Indicator'] = label_encoder.fit_transform(df['Holiday_Indicator'])
df['Weather_Conditions'] = label_encoder.fit_transform(df['Weather_Conditions'])
df['Return_Reason'] = label_encoder.fit_transform(df['Return_Reason'])
df['Region'] = label_encoder.fit_transform(df['Region'])
df['Warehouse_Location'] = label_encoder.fit_transform(df['Warehouse_Location'])

In [166]:
features = ['Price', 'Cost','Sales_Channel', 'Customer_Segment', 
            'Stock_Level', 'Safety_Stock_Level', 'Reorder_Point', 'Lead_Time', 'Supplier_Reliability', 
            'Seasonality', 'Holiday_Indicator', 'Economic_Indicators', 'Weather_Conditions', 
            'Promotion_Flag', 'Discount_Rate', 'Marketing_Spend', 'Competitor_Price', 
            'Competitor_Promotion', 'Return_Quantity', 'Average_Order_Interval', 'Region', 'Warehouse_Location']

In [167]:
df['Order_Date_Ordinal'] = df['Order_Date'].map(lambda x: x.toordinal())
features.append('Order_Date_Ordinal')

In [169]:
target = 'Order_Quantity'

In [170]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(df[features])
y = df[target].values

In [171]:
# Convert data into time series format
def create_dataset(X, y, time_step=1):
    X_data, y_data = [], []
    for i in range(len(X) - time_step):
        X_data.append(X[i:(i + time_step)])
        y_data.append(y[i + time_step])
    return np.array(X_data), np.array(y_data)

In [172]:
# Define the time_step for LSTM
time_step = 10


In [173]:
# Prepare data for LSTM
X_lstm, y_lstm = create_dataset(X_scaled, y, time_step)

In [174]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_lstm, y_lstm, test_size=0.2, random_state=42)


In [175]:
# Build LSTM model
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(32, return_sequences=False),
    Dense(1)
])

  super().__init__(**kwargs)


In [176]:
model.compile(optimizer='adam', loss='mean_squared_error')



In [177]:
# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


In [178]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test), callbacks=[early_stopping])


Epoch 1/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 657.6575 - val_loss: 420.4468
Epoch 2/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 411.1716 - val_loss: 324.7046
Epoch 3/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 319.5211 - val_loss: 267.1159
Epoch 4/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 265.2523 - val_loss: 234.0441
Epoch 5/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 234.1734 - val_loss: 216.3078
Epoch 6/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 217.6821 - val_loss: 207.8256
Epoch 7/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 213.5384 - val_loss: 204.2733
Epoch 8/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 209.9483 - val_loss: 202.9695

<keras.src.callbacks.history.History at 0x164a7b09520>

In [185]:
def predict_order_quantity(order_date, product_id):
    """
    Predict the order quantity based on the given order date and product ID using the trained model.
    
    Parameters:
    - order_date (str): The order date in 'YYYY-MM-DD' format.
    - product_id (int): The product ID (integer).
    
    Returns:
    - float: The predicted order quantity.
    """
    
    # Convert Order_Date to ordinal format
    input_date = pd.to_datetime(order_date)
    input_date_ordinal = input_date.toordinal()
    
    # Fill in other feature values with historical averages or defaults
    default_features = {
        'Price': 100,                # Example average price
        'Cost': 50,                  # Example average cost
        'Sales_Channel': 1,          # Default encoded value
        'Customer_Segment': 1,
        'Stock_Level': 500,
        'Safety_Stock_Level': 50,
        'Reorder_Point': 100,
        'Lead_Time': 5,
        'Supplier_Reliability': 0.95,
        'Seasonality': 1,
        'Holiday_Indicator': 0,
        'Economic_Indicators': 1.0,
        'Weather_Conditions': 1,
        'Promotion_Flag': 0,
        'Discount_Rate': 0.1,
        'Marketing_Spend': 200,
        'Competitor_Price': 110,
        'Competitor_Promotion': 0,
        'Return_Quantity': 10,
        'Average_Order_Interval': 30,
        'Region': 1,
        'Warehouse_Location': 1,
        
    }
    
    # Build input features (including Product_ID)
    input_features = default_features
    input_features['Product_ID'] = product_id  # Product ID should be included
    
    # Convert input features to array
    input_array = list(input_features.values())
    
    # Check if the number of features is correct
    # if len(input_array) != 24:
    #     print(f"Error: Expected 24 features, but found {len(input_array)}.")
    #     return None
    
    # Normalize the input features using the previously fitted scaler
    input_array_scaled = scaler.transform([input_array])  # Scaling with 24 features
    
    # Format input for LSTM (time-step, features)
    input_lstm = np.expand_dims(input_array_scaled, axis=0)  # Shape: (1, time_step, features)
    
    # Predict order quantity
    predicted_quantity = model.predict(input_lstm)
    return predicted_quantity[0][0]*100

In [186]:
predicted_quantity = predict_order_quantity(
    order_date='2024-02-14',
    product_id=1005 
)

print(f'Predicted Order Quantity: {predicted_quantity}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Predicted Order Quantity: 43.68339920043945


