## Runs the batch prediction pipeline 

Writes predictions to `wave_predictions` feature group, creates a PNG with the surf height predictions, and uploads it to Hopsworks.

### Requirements

 * hopsworks

In [None]:
import hopsworks
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
import datetime
import os
import joblib

### Not app.hopsworks.ai ?

If you are running your own Hopsworks cluster (not app.hopsworks.ai):

 * uncomment the cell below
 * fill in details for your cluster
 * run the cel

In [None]:
# Uncomment this cell and fill in details if you are running your own Hopsworks cluster
# key=""
# with open("api-key.txt", "r") as f:
#     key = f.read().rstrip()
# os.environ['HOPSWORKS_PROJECT']="cjsurf"
# os.environ['HOPSWORKS_HOST']="35.187.178.84"
# os.environ['HOPSWORKS_API_KEY']=key

### Connect to your Hopsworks cluster

If you only set the HOPSWORKS_API_KEY, it will assume you are connecting to app.hopsworks.ai.
Set HOPSWORKS_HOST and HOPSWORKS_PROJECT environment variables to connect to a different Hopsworks cluster.

In [None]:
project = hopsworks.login()
fs = project.get_feature_store()

### Download the model from the model registry

Then read the pickled file and unpickle the serialized object into the model object.

In [None]:
mr = project.get_model_registry()
model = mr.get_model("cjsurf", version=1)
model_dir = model.download()
model = joblib.load(model_dir + "/knn_model.pkl")

### Get a reference to the feature view

You need to initialize the feature view with a training data version if the feature view has transformations, as transformations need to know which training data statistics to use for normalization, encoding, etc.

In [None]:
fv = fs.get_feature_view("lahinch_surf", version=1)
fv.init_serving(1)

In [None]:
print(fv.keys)

### Retreive the feature vector

It comes back as a Python array

In [None]:
key = {"beach_id" : 1}
vector = fv.get_feature_vector(key)
print(vector)

### Refactor the feature vector

We are going to make 119 predictions with the vector for hour=0, hour=2, .., hour=338.

Each prediction for hour=X is made using a feature vector containing features from the swell:

 * height
 * period
 * direction
 
Here, we have to extract the 119 feature vectors from the Python array returned from the feature store.


In [None]:
feature_vectors = []
# Remove the 'pred_dtime' column
feature_vectors.append(vector[0:3])
for i in range(4,len(vector),4):
    feature_vectors.append(vector[i:i+3])
dt = vector[3] # 'pred_dtime'
feature_vectors

### Make  surf height predictions

119 predictions for hour=0, hour=2, ..., hour=338.

Store the predictions in `row_list`.

In [None]:
row_list=[]
columns=["beach_id","hits_at", "wave_height"]
for i in range(0,len(feature_vectors)):
    arr=[]
    arr.append(feature_vectors[i])
    res = model.predict(arr)
    row = []
    row.append(1) #beach_id
    hour = datetime.timedelta(0, (1*3600*i*2))
    ts = dt + hour
    dt_str = ts.strftime("%Y-%m-%d %H:%M") #:%S
    row.append(dt_str)
    row.append(res[0])
    row_list.append(row)

df = pd.DataFrame(row_list, columns=columns)
df

### Insert the predictions into the feature group

Store the predictions for later analysis, feature monitoring, model performance analysis.

In [None]:
fs = project.get_feature_store()

preds_fg = fs.get_or_create_feature_group(name="wave_predictions",
                version=1,
                primary_key=["beach_id", "hits_at"],
                description="Lahinch surf height predictions",
                online_enabled=True
                )
preds_fg.insert(df)   

### Create PNG file with the surf height predictions

Use plotly to create a chart with the surf height predictions. Upload it to the Resources directory in your project. 

In [None]:
import plotly.express as px
fig = px.line(df, x = "hits_at", y = "wave_height", 
              #markers=True, 
              title = "Wave Heights at Lahinch"
             )
fig.update_layout(
#    plot_bgcolor="white",
    margin=dict(t=50,l=10,b=10,r=10)
)
fig.update_layout(
    xaxis_tickformat = '%d/%m (%a)<br>Time %h:%m <br> %Y'
)

fig.update_layout()

fig.update_xaxes(
        ticks="outside", 
        tickwidth=2,
        tickcolor='black',
        ticklen=10,
        title_text = "Time",
        title_font = {"size": 36},
        title_standoff = 25)
fig.update_yaxes(
        title_text = "Wave Height (ft)",
        title_font = {"size": 36},
        title_standoff = 25,
)

fig.update_layout(
        font=dict(
          family="Time",
          size=24,
          color="black"
        )
)

filename="../latest_lahinch.png"
fig.write_image(file=filename, format="png", width=1920, height=1280)
dataset_api = project.get_dataset_api()
uploaded_file_path = dataset_api.upload(filename, "Resources", overwrite=True)

fig.show()

In [None]:
# TODO: update images with predictions of previous week and outcomes