Import libraries

In [1]:
import xgboost as xgb
import pandas as pd
from sklearn.metrics import accuracy_score

import hopsworks

  from .autonotebook import tqdm as notebook_tqdm


Get model from model registry

In [2]:
project = hopsworks.login()

# Get the model registry
mr = project.get_model_registry()

2025-01-06 23:37:24,672 INFO: Initializing external client
2025-01-06 23:37:24,673 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-06 23:37:26,702 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1149079


In [3]:
# Retrieve the model from the model registry
retrieved_model = mr.get_model(
    name="news_propagation_model",
    version=4,
)

# Download the saved model files to a local directory
saved_model_dir = retrieved_model.download()

Downloading model artifact (0 dirs, 1 files)... DONE

In [4]:
# Initialize the model
model = xgb.XGBClassifier()

# Load the model from a saved JSON file
model.load_model(saved_model_dir + "/model.json")
model

Get features to predict

In [5]:
fs = project.get_feature_store()

In [6]:
user_query_fg = fs.get_feature_group(
    name="user_query",
    version=1,
)

In [7]:
user_query = user_query_fg.select_all().read()
user_query

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.93s) 


Unnamed: 0,news_id,average_followers,average_follows,repost_total,post_total,repost_percentage,average_repost,average_favorite,news_lifetime,nb_users_10_hours,average_time_difference,retweet_percentage_1_hour
0,macron friends trump,3356.566667,2092.5,92,16,0.851852,5.796296,28.966667,35914710.0,1,0.0,0.009259
1,mbappe in italy,6694.857143,219.285714,2,6,0.25,0.25,2.142857,24294410.0,1,0.0,0.125
2,israel defeat,2381.449541,1010.926606,54,96,0.36,1.36,2.266055,1486304.0,2,0.0,0.013333
3,france victory japan,7171.285714,501.142857,92,11,0.893204,7.883495,23.952381,43633080.0,1,0.0,0.009709


In [8]:
selected_features = user_query_fg.select_except(["news_id"])

In [9]:
features_to_predict = selected_features.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.52s) 


In [11]:
predictions_fg = fs.get_or_create_feature_group(
    name="news_propagation_predictions",
    version=1,
    description="News propagation prediction results",
    primary_key=["user_query"],
)

In [12]:
try :
    predictions_df = predictions_fg.read()
except:
    predictions = model.predict(features_to_predict)
    predictions_df = user_query["news_id"].to_frame().rename(columns={"news_id": "user_query"})
    predictions_df["prediction"] = predictions

In [13]:
predictions_df

Unnamed: 0,user_query,prediction
0,macron friends trump,1
1,mbappe in italy,1
2,israel defeat,0
3,france victory japan,1


In [14]:
predictions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   user_query  4 non-null      object
 1   prediction  4 non-null      int32 
dtypes: int32(1), object(1)
memory usage: 176.0+ bytes


In [15]:
prediction = model.predict(features_to_predict.tail(1))
prediction

array([1])

In [16]:
new_prediction = pd.DataFrame({"user_query": user_query.tail(1)["news_id"].values[0], "prediction": prediction})
new_prediction

Unnamed: 0,user_query,prediction
0,france victory japan,1


In [17]:
new_prediction.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   user_query  1 non-null      object
 1   prediction  1 non-null      int32 
dtypes: int32(1), object(1)
memory usage: 140.0+ bytes


In [18]:
predictions_df = predictions_df._append(new_prediction, ignore_index=True)

In [19]:
predictions_df

Unnamed: 0,user_query,prediction
0,macron friends trump,1
1,mbappe in italy,1
2,israel defeat,0
3,france victory japan,1
4,france victory japan,1


In [20]:
predictions_fg.insert(predictions_df, write_options={"wait_for_job": True})

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1149079/fs/1139782/fg/1394685


Uploading Dataframe: 100.00% |██████████| Rows 5/5 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: news_propagation_predictions_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai:443/p/1149079/jobs/named/news_propagation_predictions_1_offline_fg_materialization/executions
2025-01-06 23:37:50,944 INFO: Waiting for execution to finish. Current state: SUBMITTED. Final status: UNDEFINED
2025-01-06 23:37:54,133 INFO: Waiting for execution to finish. Current state: RUNNING. Final status: UNDEFINED
2025-01-06 23:39:29,683 INFO: Waiting for execution to finish. Current state: AGGREGATING_LOGS. Final status: SUCCEEDED
2025-01-06 23:39:29,856 INFO: Waiting for log aggregation to finish.
2025-01-06 23:39:38,458 INFO: Execution finished successfully.


(Job('news_propagation_predictions_1_offline_fg_materialization', 'SPARK'),
 None)