In [12]:
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks


import warnings
warnings.filterwarnings("ignore")

# 1. Read your Hopsworks API Key from a file or environment variable
#    For example, if stored in 'hopsworks-api-key.txt'
with open('../data/hopsworks-api-key.txt', 'r') as f:
    api_key = f.read().strip()

# 2. Set the environment variable for Hopsworks
os.environ["HOPSWORKS_API_KEY"] = api_key

project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve feature groups for Clash Royale
player_stats_fg = fs.get_feature_group(
    name='clash_royale_onehotencoding',
    version=4,
)
game_events_fg = fs.get_feature_group(
    name='clash_royale_dataset_onehotencoding',
    version=4,
)

df = player_stats_fg.read()




'''
# Retrieve features from feature groups
player_stats = player_stats_fg.select_all()
game_events = game_events_fg.select_all()

# Explicitly prefix column names to avoid ambiguity
player_stats = player_stats.with_column_renamed("player_name", "player_stats_player_name")
game_events = game_events.with_column_renamed("player_name", "game_events_player_name")

# Join the feature groups on the prefixed columns
selected_features = player_stats.join(
    game_events,
    on=["player_stats_player_name", "game_events_player_name"]
)

# Display the first 10 rows of the joined features
selected_features.show(10)


feature_view = fs.get_or_create_feature_view(
    name='clash_royale_fv',
    description="Clash Royale game prediction features",
    version=1,
    labels=['player_name'],
    #query=selected_features,
)
'''

2025-01-05 22:46:54,847 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-05 22:46:54,859 INFO: Initializing external client
2025-01-05 22:46:54,859 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-05 22:46:56,714 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164438
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.61s) 


'\n# Retrieve features from feature groups\nplayer_stats = player_stats_fg.select_all()\ngame_events = game_events_fg.select_all()\n\n# Explicitly prefix column names to avoid ambiguity\nplayer_stats = player_stats.with_column_renamed("player_name", "player_stats_player_name")\ngame_events = game_events.with_column_renamed("player_name", "game_events_player_name")\n\n# Join the feature groups on the prefixed columns\nselected_features = player_stats.join(\n    game_events,\n    on=["player_stats_player_name", "game_events_player_name"]\n)\n\n# Display the first 10 rows of the joined features\nselected_features.show(10)\n\n\nfeature_view = fs.get_or_create_feature_view(\n    name=\'clash_royale_fv\',\n    description="Clash Royale game prediction features",\n    version=1,\n    labels=[\'player_name\'],\n    #query=selected_features,\n)\n'

In [13]:
dfstats = player_stats_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.49s) 


In [14]:
dfdata = game_events_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.68s) 


In [15]:
df_1 = dfdata.iloc[:500,:]
df_2 = dfdata.iloc[500:,:]

In [16]:
selected_features = game_events_fg.select(['deck','deck2','result','result2'])

In [17]:
feature_view = fs.get_or_create_feature_view(
    name='clashroyale_fv',
    description="deck features",
    version=1,
    labels=['result'],
    query=selected_features,
)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/1164438/fs/1155141/fv/clashroyale_fv/version/1


In [22]:
# Define the feature columns (all except the target columns)
feature_columns = [col for col in df_1.columns if col not in ['Result1', 'Result2']]

# Extract features and multiple targets for train and test sets
X_train = df_1[feature_columns]  # Features from df_1
y_train = df_1[['result', 'result2']]  # Two target variables from df_1

X_test = df_2[feature_columns]   # Features from df_2
y_test = df_2[['result', 'result2']]   # Two target variables from df_2

# Display the shapes of the resulting splits
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


X_train shape: (500, 6), y_train shape: (500, 2)
X_test shape: (125, 6), y_test shape: (125, 2)


In [21]:


X_train, X_test, y_train, y_test = feature_view.train_test_split(
    test_start=300
)

# Drop unnecessary columns
train_features = X_train.drop(['date', 'player_id'], axis=1)
test_features = X_test.drop(['date', 'player_id'], axis=1)

# Create and train the XGBoost Regressor
xgb_regressor = XGBRegressor()
xgb_regressor.fit(train_features, y_train)

# Predict game outcomes on the test set
y_pred = xgb_regressor.predict(test_features)

# Calculate and print metrics
mse = mean_squared_error(y_test.iloc[:, 0], y_pred)
print("MSE:", mse)

r2 = r2_score(y_test.iloc[:, 0], y_pred)
print("R squared:", r2)

# Save predictions to DataFrame
df = y_test
df['predicted_game_outcome'] = y_pred

# Create directory for model artifacts
model_dir = "clash_royale_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
images_dir = model_dir + "/images"
if not os.path.exists(images_dir):
    os.mkdir(images_dir)

# Visualize and save feature importance
plot_importance(xgb_regressor, max_num_features=4)
feature_importance_path = images_dir + "/feature_importance.png"
plt.savefig(feature_importance_path)
plt.show()

from hsml.schema import Schema
from hsml.model_schema import ModelSchema

# Define input and output schemas
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)
schema_dict = model_schema.to_dict()

# Save the XGBoost model
xgb_regressor.save_model(model_dir + "/model.json")
mr = project.get_model_registry()

# Register the model in the Hopsworks model registry
cr_model = mr.python.create_model(
    name="clash_royale_xgboost_model",
    metrics={"mse": mse, "r2": r2},
    model_schema=model_schema,
    input_example=X_test.sample().values,
    description="Clash Royale game outcome predictor",
)

# Save the model artifacts to the registry
cr_model.save(model_dir)


RestAPIError: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/1164438/featurestores/1155141/featureview/clashroyale_fv/version/1/trainingdatasets). Server response: 
HTTP code: 400, HTTP reason: Bad Request, body: b'{"errorCode":270172,"usrMsg":"Failed to define time series split because event time column is not available in one or more feature groups.","errorMsg":"Event time feature not found"}', error code: 270172, error msg: Event time feature not found, user msg: Failed to define time series split because event time column is not available in one or more feature groups.