In [189]:
import os
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn.metrics import mean_squared_error, r2_score
import hopsworks


import warnings
warnings.filterwarnings("ignore")

# 1. Read your Hopsworks API Key from a file or environment variable
#    For example, if stored in 'hopsworks-api-key.txt'
with open('../data/hopsworks-api-key.txt', 'r') as f:
    api_key = f.read().strip()

# 2. Set the environment variable for Hopsworks
os.environ["HOPSWORKS_API_KEY"] = api_key

project = hopsworks.login()
fs = project.get_feature_store()

# Retrieve feature groups for Clash Royale
player_stats_fg = fs.get_feature_group(
    name='clash_royale_onehotencoding',
    version=5,
)
game_events_fg = fs.get_feature_group(
    name='clash_royale_dataset_onehotencoding',
    version=5,
)

df = player_stats_fg.read()




'''
# Retrieve features from feature groups
player_stats = player_stats_fg.select_all()
game_events = game_events_fg.select_all()

# Explicitly prefix column names to avoid ambiguity
player_stats = player_stats.with_column_renamed("player_name", "player_stats_player_name")
game_events = game_events.with_column_renamed("player_name", "game_events_player_name")

# Join the feature groups on the prefixed columns
selected_features = player_stats.join(
    game_events,
    on=["player_stats_player_name", "game_events_player_name"]
)

# Display the first 10 rows of the joined features
selected_features.show(10)


feature_view = fs.get_or_create_feature_view(
    name='clash_royale_fv',
    description="Clash Royale game prediction features",
    version=1,
    labels=['player_name'],
    #query=selected_features,
)
'''

2025-01-06 16:21:28,345 INFO: Closing external client and cleaning up certificates.
Connection closed.
2025-01-06 16:21:28,354 INFO: Initializing external client
2025-01-06 16:21:28,355 INFO: Base URL: https://c.app.hopsworks.ai:443
2025-01-06 16:21:31,036 INFO: Python Engine initialized.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1164438
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.56s) 


'\n# Retrieve features from feature groups\nplayer_stats = player_stats_fg.select_all()\ngame_events = game_events_fg.select_all()\n\n# Explicitly prefix column names to avoid ambiguity\nplayer_stats = player_stats.with_column_renamed("player_name", "player_stats_player_name")\ngame_events = game_events.with_column_renamed("player_name", "game_events_player_name")\n\n# Join the feature groups on the prefixed columns\nselected_features = player_stats.join(\n    game_events,\n    on=["player_stats_player_name", "game_events_player_name"]\n)\n\n# Display the first 10 rows of the joined features\nselected_features.show(10)\n\n\nfeature_view = fs.get_or_create_feature_view(\n    name=\'clash_royale_fv\',\n    description="Clash Royale game prediction features",\n    version=1,\n    labels=[\'player_name\'],\n    #query=selected_features,\n)\n'

In [190]:
dfstats = player_stats_fg.read()

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (0.51s) 


In [191]:
dfdata = game_events_fg.read()
dfdata

Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.31s) 


Unnamed: 0,player_name,player_name2,deck1,deck2,result
0,#PRG9UCG8C,#2J9JGLUJP,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
1,#QY8Q9LGY,#2R0PJUPVU,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
2,#URQJURR0,#YPG29JVR,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
3,#2V8G9RPQ8,#20GL9LPLC,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...",1
4,#PUVYRQRC,#2JC9PQ2L,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
...,...,...,...,...,...
620,#YYRPRU82V,#8R2YU8LPV,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
621,#LRUU0L22,#2C9CJVYQ8,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
622,#8PVGJCRQR,#CY00CGYV,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1
623,#2VGU0RU2,#82PJ0L0CR,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1


In [192]:
df_1 = dfdata.iloc[:500,:]
df_2 = dfdata.iloc[500:,:]

In [193]:
selected_features = game_events_fg.select(['deck1','deck2','result'])

In [194]:
feature_view = fs.get_or_create_feature_view(
    name='clashroyale_fv',
    description="deck features",
    version=3,
    labels=['result'],
    query=selected_features,
)

In [195]:
#feature_view.read()

In [196]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = feature_view.train_test_split(test_size=0.2)


Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.50s) 


In [197]:
X_train

Unnamed: 0,deck1,deck2
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
5,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...
620,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
621,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
622,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
623,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [198]:
import pandas as pd

# Assuming x_train is already loaded into memory, we'll convert the deck1 and deck2 columns into arrays of numbers.
# Simulating an example since the actual x_train content is not directly available.

# Example data simulation


# Transform columns into arrays of numbers
X_train["deck1"] = X_train["deck1"].apply(lambda x: list(x))
X_train["deck2"] = X_train["deck2"].apply(lambda x: list(x))

X_train


Unnamed: 0,deck1,deck2
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
5,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...
620,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
621,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
622,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
623,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [199]:
X_test = X_test.dropna()

X_train = X_train.dropna()




In [200]:
y_train = y_train.dropna()

y_test = y_test.dropna()

In [201]:
# Example of flattening columns
deck_flat = pd.DataFrame(data=list(X_train['deck1']), index=X_train.index)
deck2_flat = pd.DataFrame(data=list(X_train['deck2']), index=X_train.index)

# Merge flattened columns into the dataset
#X_train = pd.concat([X_train.drop(['deck1', 'deck2'], axis=1), deck_flat, deck2_flat], axis=1)
#X_test = pd.concat([X_test.drop(['deck1', 'deck2'], axis=1), 
                #    pd.DataFrame(data=list(X_test['deck1']), index=X_test.index), 
                 #   pd.DataFrame(data=list(X_test['deck2']), index=X_test.index)], axis=1)


In [202]:
print(X_test.dtypes)


deck1    object
deck2    object
dtype: object


In [203]:
X_train


Unnamed: 0,deck1,deck2
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
5,"[0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...
620,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
621,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
622,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
623,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [204]:
y_train

Unnamed: 0,result
1,1
2,1
3,1
4,1
5,1
...,...
620,1
621,1
622,1
623,1


In [205]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# Creare il modello
model = Sequential([
    Input(shape=(624,)),  # Dimensione del vettore d'ingresso
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Per classificazione binaria
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Addestrare il modello
model.fit(X_train, y_train, epochs=10, batch_size=32)


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

In [None]:
# Drop unnecessary columns
#train_features = X_train.drop(['date', 'player_id'], axis=1)
#test_features = X_test.drop(['date', 'player_id'], axis=1)

# Create and train the XGBoost Regressor
#xgb_regressor = XGBRegressor()
xgb_regressor = XGBRegressor(enable_categorical=True)
xgb_regressor.fit(X_train, y_train)

# Predict game outcomes on the test set
y_pred = xgb_regressor.predict(X_test)

# Calculate and print metrics
mse = mean_squared_error(y_test.iloc[:, 0], y_pred)
print("MSE:", mse)

r2 = r2_score(y_test.iloc[:, 0], y_pred)
print("R squared:", r2)

# Save predictions to DataFrame
df = y_test
df['predicted_game_outcome'] = y_pred

# Create directory for model artifacts
model_dir = "clash_royale_model"
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
images_dir = model_dir + "/images"
if not os.path.exists(images_dir):
    os.mkdir(images_dir)

# Visualize and save feature importance
plot_importance(xgb_regressor, max_num_features=4)
feature_importance_path = images_dir + "/feature_importance.png"
plt.savefig(feature_importance_path)
plt.show()

from hsml.schema import Schema
from hsml.model_schema import ModelSchema

# Define input and output schemas
input_schema = Schema(X_train)
output_schema = Schema(y_train)
model_schema = ModelSchema(input_schema=input_schema, output_schema=output_schema)
schema_dict = model_schema.to_dict()

# Save the XGBoost model
xgb_regressor.save_model(model_dir + "/model.json")
mr = project.get_model_registry()

# Register the model in the Hopsworks model registry
cr_model = mr.python.create_model(
    name="clash_royale_xgboost_model",
    metrics={"mse": mse, "r2": r2},
    model_schema=model_schema,
    input_example=X_test.sample().values,
    description="Clash Royale game outcome predictor",
)

# Save the model artifacts to the registry
cr_model.save(model_dir)


ValueError: DataFrame.dtypes for data must be int, float, bool or category. When categorical type is supplied, the experimental DMatrix parameter`enable_categorical` must be set to `True`.  Invalid columns:deck1: object, deck2: object

In [181]:
import pandas as pd
import numpy as np

# Example DataFrame
X_train = pd.DataFrame({
    "deck1": [[0, 1, 0, 1, 0], [0, 0, 1, 0, 1], [1, 0, 0, 0, 0]],
    "deck2": [[1, 0, 0, 1, 0], [0, 0, 1, 0, 0], [1, 1, 0, 0, 1]],
    "other_feature": [5, 10, 15]
})

# Expand `deck1` and `deck2` arrays into multiple numerical columns
deck1_expanded = pd.DataFrame(X_train["deck1"].to_list(), columns=[f"deck1_{i}" for i in range(len(X_train["deck1"].iloc[0]))])
deck2_expanded = pd.DataFrame(X_train["deck2"].to_list(), columns=[f"deck2_{i}" for i in range(len(X_train["deck2"].iloc[0]))])

# Combine expanded decks with other features
X_train_transformed = pd.concat([deck1_expanded, deck2_expanded, X_train.drop(columns=["deck1", "deck2"])], axis=1)

X_train_transformed.head()


Unnamed: 0,deck1_0,deck1_1,deck1_2,deck1_3,deck1_4,deck2_0,deck2_1,deck2_2,deck2_3,deck2_4,other_feature
0,0,1,0,1,0,1,0,0,1,0,5
1,0,0,1,0,1,0,0,1,0,0,10
2,1,0,0,0,0,1,1,0,0,1,15
