# Batch Inference Pipeline

In this notebook, we will do the following tasks:
1. Create a batch inference pipeline using the pre-trained model.
2. Run the pipeline and get the predictions.


In [1]:
import hopsworks
import os
import json
import torch
import numpy as np
from dotenv import load_dotenv

# Load the.env file
load_dotenv()

# Get the environment variables
hopsworks_api_key = os.getenv("HOPSWORKS_API_KEY")

In [2]:
project = hopsworks.login(api_key_value=str(hopsworks_api_key))
fs = project.get_feature_store()


2025-02-22 12:57:07,116 INFO: Initializing external client
2025-02-22 12:57:07,125 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-02-22 12:57:10,615 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1212597


### Get the model from model registry

In [3]:
mr = project.get_model_registry()

EVALUATION_METRIC="mean_squared_error"  
SORT_METRICS_BY="min" # your sorting criteria

# get best model based on custom metrics
best_model = mr.get_best_model("amazon_stock_price_prediction_model_torch",
                               EVALUATION_METRIC,
                               SORT_METRICS_BY)


In [4]:
# Load the pre-trained model
model_dir = "../models/amazon_stock_price_prediction_model_torch"
best_model.download(model_dir)
state_dict = torch.load(f"{model_dir}/model.pt", weights_only=True)


with open("../preprocessor/hyper_params.json", "r") as f:
    hyper_params = json.load(f)

Downloading model artifact (0 dirs, 1 files)... DONE

In [5]:
# Create the model
import torch
from torch import nn
class LSTMModel(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int, device:str = 'cpu'):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.device = device
        
		# LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
	# forward pass
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(self.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(self.device)
        
        out, (_, _) = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
        


In [6]:
model = LSTMModel(input_dim=hyper_params['input_size'], hidden_dim=hyper_params['hidden_size'], output_dim=hyper_params['forecast_steps'], num_layers=hyper_params['num_layers'], device='cpu').to('cpu')

# Load the trained model state_dict
model.load_state_dict(state_dict)


<All keys matched successfully>

### Get Feature view

In [7]:
amazon_fv = fs.get_feature_view("amazon_fv")





In [8]:
batch_data = amazon_fv.get_batch_data()


# get the last 24 days of data for window_size
sample  = batch_data.sort_values('date').drop('date', axis=1).tail(24)

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.52s) 


In [9]:
import pandas as pd
batch_data['date'] = pd.to_datetime(batch_data['date'], utc=True)

batch_data = batch_data.sort_values('date')

batch_data.tail(6)

Unnamed: 0,date,open,high,close,low
4240,2025-02-21 15:00:00+00:00,220.83,221.16,219.395,218.075
4241,2025-02-21 16:00:00+00:00,219.395,219.76,218.875,218.54
4239,2025-02-21 17:00:00+00:00,218.875,218.9,216.965,216.55
4238,2025-02-21 18:00:00+00:00,216.955,217.05,215.77,215.52
4237,2025-02-21 19:00:00+00:00,215.79,216.15,215.465,214.755
4236,2025-02-21 20:00:00+00:00,215.46,216.83,216.61,215.355


In [10]:
model.eval()
with torch.inference_mode():
    outputs = model(torch.tensor(np.array(sample)).float().unsqueeze(0).to('cpu'))

In [17]:
outputs = outputs.reshape(-1, 1)

In [21]:
import dataframe_image as dfi
import pandas as pd

time_stamps = batch_data.tail(6)['date'].dt.time.values[::-1]

df = pd.DataFrame(outputs, columns=["Predicted"])
df = df.set_index(time_stamps)

In [22]:
df

Unnamed: 0,Predicted
20:00:00,180.487335
19:00:00,180.502701
18:00:00,180.586243
17:00:00,180.482452
16:00:00,180.519226
15:00:00,180.586624
