# <span style="font-width:bold; font-size: 3rem; color:#1EB182;"><img src="../images/icon102.png" width="38px"></img> **Hopsworks Feature Store** </span><span style="font-width:bold; font-size: 3rem; color:#333;">- Part 04: Batch Inference</span>

## 🗒️ This notebook is divided into the following sections:

1. Load batch data.
2. Predict using model from Model Registry.

## <span style='color:#ff5f27'> 📝 Imports

In [2]:
import joblib
import datetime
import time
import pandas as pd
from functions import *


In [3]:
# Getting the current date
today = datetime.date.today()

## <span style="color:#ff5f27;"> 📡 Connecting to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store()

## <span style="color:#ff5f27;"> ⚙️ Feature View Retrieval</span>


In [None]:
# Retrieve the 'air_quality_fv' feature view
feature_view = fs.get_feature_view(
    name='air_quality_fv',
    version=1,
)

## <span style="color:#ff5f27;">🗄 Model Registry</span>


In [None]:
# Retrieve the model registry
mr = project.get_model_registry()

## <span style="color:#ff5f27;">🪝 Retrieving model from Model Registry</span>

In [None]:
# Retrieving the 'air_quality_xgboost_model' from the model registry
retrieved_model = mr.get_model(
    name="air_quality_xgboost_model",
    version=1,
)

# Downloading the saved model artifacts to a local directory
saved_model_dir = retrieved_model.download()

In [None]:
# Loading the XGBoost regressor model and label encoder from the saved model directory
retrieved_xgboost_model = joblib.load(saved_model_dir + "/xgboost_regressor.pkl")
retrieved_encoder = joblib.load(saved_model_dir + "/label_encoder.pkl")

# Displaying the retrieved XGBoost regressor model
retrieved_xgboost_model

## <span style="color:#ff5f27;">✨ Get Weather Forecast Features with Feature View   </span>



In [None]:
# Getting the current date
today = datetime.date.today()

In [None]:
# Initializing batch scoring
# feature_view.init_batch_scoring(1)

# Retrieving batch data from the feature view with a start time set to the date threshold
batch_data = feature_view.get_batch_data(start_time=today)

### <span style="color:#ff5f27;">🤖 Making the predictions</span>

In [None]:
# Transforming the 'city_name' column in the batch data using the retrieved label encoder
# encoded = retrieved_encoder.transform(batch_data['city_name'])

# # Concatenating the label-encoded 'city_name' with the original batch data
# X_batch = pd.concat([batch_data, pd.DataFrame(encoded)], axis=1)

# # Dropping unnecessary columns ('date', 'city_name', 'unix_time') from the batch data
# X_batch = X_batch.drop(columns=['date', 'city_name', 'unix_time'])

# # Renaming the newly added column with label-encoded city names to 'city_name_encoded'
# X_batch = X_batch.rename(columns={0: 'city_name_encoded'})

# # Extracting the target variable 'pm2_5' from the batch data
# y_batch = X_batch.pop('pm2_5')

In [None]:
# Making predictions on the batch data using the retrieved XGBoost regressor model
batch_data['pm25'] = retrieved_xgboost_model.predict(batch_data)

# Displaying the first 5 predictions
batch_data[:5]

In [None]:
# draw a graph of the predictions with dates as a PNG and save it to the github repo
# show it on github pages

In [None]:
# Get or create feature group
monitor_fg = fs.get_or_create_feature_group(
    name='monitoring',
    description='Air Quality prediction monitoring',
    version=1,
    primary_key=['city','street','date'],
    event_time="date"
)

In [None]:
monitor_fg.insert(batch_data)

In [None]:
monitoring_df = monitoring_df.read()
# the batch_data won't have been sync'd yet, so we can just append it to the historical data
# monitoring_df = monitoring_df.append(batch_data)

In [None]:
air_quality_fg = fs.get_feature_group(
    name='air_quality',
    version=1,
)
air_quality_df = air_quality_fg.read()

outcome_df['date', 'outcome_pm25'] = air_quality_df[['date', 'pm25']]

preds_df['date', 'prediction_pm25'] = monitoring_df[['date', 'pm25']]

hindcast_df = pd.merge(monitoring_df, outcome_df, on="date")

In [None]:
# draw a hindcast graph of historical monitoring predictions vs outcomes
import plotly.express as px
fig = px.line(hindcast_df, x = "date" 
              #markers=True,  y = "pm25",
              title = f"Air Quality Forecast for {city}"
             )
fig.update_layout(
#    plot_bgcolor="white",
    margin=dict(t=50,l=10,b=10,r=10)
)
fig.update_layout(
    xaxis_tickformat = '%d/%m (%a)<br>Time %h:%m <br> %Y'
)

fig.update_layout()

fig.update_xaxes(
        ticks="outside", 
        tickwidth=2,
        tickcolor='black',
        ticklen=10,
        title_text = "Time",
        title_font = {"size": 36},
        title_standoff = 25)
fig.update_yaxes(
        title_text = "PM_25",
        title_font = {"size": 200},
        title_standoff = 25,
)

fig.update_layout(
        font=dict(
          family="Time",
          size=24,
          color="black"
        )
)

filename="latest_lahinch.png"
fig.write_image(file=filename, format="png", width=1920, height=1280)
#dataset_api = project.get_dataset_api()
#uploaded_file_path = dataset_api.upload(filename, "Resources", overwrite=True)

fig.show()
# 

In [None]:
# Create figures for app
fig_dir = 'app_figures'
if os.path.isdir(fig_dir) == False:
    os.mkdir(fig_dir)

for i, col in enumerate(df.columns[1:9]):
    plt.plot(x1, prediction[:, i])
    plt.gcf().autofmt_xdate()
    plt.xlabel('Date')
    plt.ylabel(col)
    plt.title('Prediction of ' + col)
    plt.savefig(fig_dir + '/pred_' + col + '.png')
    plt.clf()

    # Upload the images to the dataset api in hopsworks
    dataset_api.upload(fig_dir + '/pred_' + col + '.png', 'Resources/predictions', overwrite=True)

    plt.plot(x2, prediction_eval[:, i], label='Prediction')
    plt.plot(x2, df.iloc[-24:][col], label='Outcome')
    plt.xlabel('Date')
    plt.ylabel(col)
    plt.title('Prediction of ' + col)
    plt.gcf().autofmt_xdate()
    plt.legend()
    plt.savefig(fig_dir + '/prev_' + col + '.png')
    plt.clf()

# 



---
## <span style="color:#ff5f27;">👾 Now try out the Streamlit App!</span>

In [None]:
# !python3 -m streamlit run streamlit_app.py

---

### <span style="color:#ff5f27;">🥳 <b> Next Steps  </b> </span>
Congratulations you've now completed the Air Quality tutorial for Managed Hopsworks.

Check out our other tutorials on ➡ https://github.com/logicalclocks/hopsworks-tutorials

Or documentation at ➡ https://docs.hopsworks.ai