In [1]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [2]:
def prepare_data(series, look_back=3):
    """Convert series into X, y pairs for LSTM."""
    dataX, dataY = [], []
    for i in range(len(series) - look_back):
        dataX.append(series[i:(i + look_back)])
        dataY.append(series[i + look_back])
    return np.array(dataX), np.array(dataY)

In [3]:
# Initialize scalers and models
scalers = {}
models = {}

def train_lstm(df, look_back=3, epochs=50, batch_size=32):
    """Train LSTM models for each category-item."""
    global scalers, models
    for (cat, item), group in df.groupby(['category_code', 'item_code']):
        if len(group) < look_back + 1:
            continue
        
        group = group.sort_values('month')
        prices = group['price'].values.reshape(-1, 1)
        
        # Scale prices
        scaler = MinMaxScaler()
        scaled_prices = scaler.fit_transform(prices)
        scalers[(cat, item)] = scaler
        
        # Prepare data
        X, y = prepare_data(scaled_prices, look_back)
        X = X.reshape((X.shape[0], X.shape[1], 1))  # Add the time-step dimension
        
        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Build LSTM model
        model = Sequential([
            LSTM(50, input_shape=(look_back, 1), return_sequences=True),
            LSTM(50),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss='mean_squared_error')
        
        # Train
        model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1)
        models[(cat, item)] = model


In [4]:
def predict_lstm(df, forecast_periods=1, look_back=3):
    """Make predictions using trained LSTM models."""
    results = []
    for (cat, item), model in models.items():
        scaler = scalers[(cat, item)]
        
        # Get historical data
        group = df[(df['category_code'] == cat) & (df['item_code'] == item)].sort_values('month')
        prices = group['price'].values.reshape(-1, 1)
        scaled_prices = scaler.transform(prices)
        
        # Use the last `look_back` prices for forecasting
        inputs = scaled_prices[-look_back:]
        forecast = []
        for _ in range(forecast_periods):
            prediction = model.predict(inputs.reshape(1, look_back, 1))[0][0]
            forecast.append(prediction)
            inputs = np.append(inputs[1:], [[prediction]], axis=0)
        
        # Scale predictions back to original values
        forecast = scaler.inverse_transform(np.array(forecast).reshape(-1, 1)).flatten()
        
        # Get future dates
        last_date = group['month'].max()
        future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')
        
        # Store predictions
        predictions = pd.DataFrame({
            'category_code': cat,
            'item_code': item,
            'month': future_dates,
            'price': forecast
        })
        results.append(predictions)
    
    return pd.concat(results, ignore_index=True) if results else pd.DataFrame()


In [5]:
# Create a sample series with category_code, item_code, month, and price columns
np.random.seed(42)
dates = pd.date_range(start="2024-01-01", end="2024-12-31", freq='M')
categories = ['cat_A', 'cat_B']
items = ['item_1', 'item_2']
data = []

for category in categories:
    for item in items:
        # Generate random price data with a trend and some noise
        x = random.randint(10, 99)
        number_with_zeros = int(f"{x}00")
        y = 10
        prices = [number_with_zeros + (10 * i) for i in range(y)]
        prices = np.round(prices, 2)  # Round to 2 decimal places
        
        for date, price in zip(dates, prices):
            data.append({'category_code': category, 'item_code': item, 'month': date, 'price': price})

# Convert to DataFrame
sample_df = pd.DataFrame(data)

# Display the first few rows of the DataFrame
sample_df

  dates = pd.date_range(start="2024-01-01", end="2024-12-31", freq='M')


Unnamed: 0,category_code,item_code,month,price
0,cat_A,item_1,2024-01-31,1900
1,cat_A,item_1,2024-02-29,1910
2,cat_A,item_1,2024-03-31,1920
3,cat_A,item_1,2024-04-30,1930
4,cat_A,item_1,2024-05-31,1940
5,cat_A,item_1,2024-06-30,1950
6,cat_A,item_1,2024-07-31,1960
7,cat_A,item_1,2024-08-31,1970
8,cat_A,item_1,2024-09-30,1980
9,cat_A,item_1,2024-10-31,1990


In [6]:
train_lstm(sample_df, look_back=3, epochs=50, batch_size=32)

Epoch 1/50


  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.6588 - val_loss: 0.1510
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.6350 - val_loss: 0.1441
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.6120 - val_loss: 0.1374
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5896 - val_loss: 0.1309
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5676 - val_loss: 0.1245
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5459 - val_loss: 0.1181
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5242 - val_loss: 0.1117
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5026 - val_loss: 0.1053
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.6294 - val_loss: 0.1474
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.6062 - val_loss: 0.1406
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5834 - val_loss: 0.1338
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5608 - val_loss: 0.1271
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.5383 - val_loss: 0.1205
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5159 - val_loss: 0.1139
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.4933 - val_loss: 0.1072
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.4705 - val_loss: 0.1005
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.6441 - val_loss: 0.1487
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.6194 - val_loss: 0.1415
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5952 - val_loss: 0.1343
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.5714 - val_loss: 0.1273
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - loss: 0.5477 - val_loss: 0.1202
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.5240 - val_loss: 0.1132
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.5002 - val_loss: 0.1062
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.4761 - val_loss: 0.0991
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms

  super().__init__(**kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - loss: 0.6096 - val_loss: 0.1445
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.5867 - val_loss: 0.1374
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.5640 - val_loss: 0.1304
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.5412 - val_loss: 0.1234
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 0.5183 - val_loss: 0.1164
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.4952 - val_loss: 0.1093
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 0.4717 - val_loss: 0.1021
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.4478 - val_loss: 0.0949
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms

In [7]:
forecast = predict_lstm(sample_df, forecast_periods=12, look_back=3)
print(forecast)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[

  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/s

  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
   category_code item_code      month        price
0          cat_A    item_1 2024-10-31  1986.511597
1          cat_A    item_1 2024-11-30  1992.332275
2          cat_A    item_1 2024-12-31  1995.933472
3          cat_A    item_1 2025-01-31  1996.500732
4          cat_A    item_1 2025-02-28  1999.670166
5          cat_A    item_1 2025-03-31  2001.556396
6          cat_A    item_1 2025-04-30  2002.789917
7          cat_A    

  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')


In [8]:
from sklearn.metrics import mean_squared_error

def evaluate_lstm(df, look_back=3):
    """Evaluate LSTM models."""
    for (cat, item), model in models.items():
        scaler = scalers[(cat, item)]
        
        group = df[(df['category_code'] == cat) & (df['item_code'] == item)].sort_values('month')
        prices = group['price'].values.reshape(-1, 1)
        scaled_prices = scaler.transform(prices)
        
        # Prepare test data
        X, y = prepare_data(scaled_prices, look_back)
        X = X.reshape((X.shape[0], X.shape[1], 1))
        predictions = model.predict(X)
        predictions = scaler.inverse_transform(predictions)
        actuals = scaler.inverse_transform(y.reshape(-1, 1))
        
        rmse = np.sqrt(mean_squared_error(actuals, predictions))
        print(f"Category: {cat}, Item: {item}, RMSE: {rmse}")

In [9]:
evaluate_lstm(sample_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step
Category: cat_A, Item: item_1, RMSE: 6.392275315847198
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step
Category: cat_A, Item: item_2, RMSE: 6.314882446651608
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
Category: cat_B, Item: item_1, RMSE: 6.52712572713184
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
Category: cat_B, Item: item_2, RMSE: 6.550692044164192


In [10]:
predict_lstm(sample_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')
  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')
  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')
  future_dates = pd.date_range(last_date, periods=forecast_periods, freq='M')


Unnamed: 0,category_code,item_code,month,price
0,cat_A,item_1,2024-10-31,1986.511597
1,cat_A,item_2,2024-10-31,2786.765625
2,cat_B,item_1,2024-10-31,5586.171387
3,cat_B,item_2,2024-10-31,6586.04541
