In [None]:
import pandas as pd
import mlflow
from sklearn.metrics import log_loss, f1_score
from pycaret.classification import setup, create_model, tune_model, predict_model
import json
import requests

# Initialize MLflow
mlflow.set_experiment("Training")

# Load training and testing data
original_data = pd.read_csv("./../Data/Raw/kobe_dataset.csv")
train_data = pd.read_parquet("./../Data/Modeling/base_train.parquet")
test_data = pd.read_parquet("./../Data/Modeling/base_test.parquet")

X_train = train_data.drop("shot_made_flag", axis=1)
y_train = train_data["shot_made_flag"]

X_test = test_data.drop("shot_made_flag", axis=1)
y_test = test_data["shot_made_flag"]

# Set up PyCaret environment
clf_setup = setup(data=pd.concat([X_train, y_train], axis=1), n_jobs=-2, fold_shuffle=True, log_experiment=True, target="shot_made_flag", experiment_name='Training')

# Create and train Logistic Regression model
lr_model = create_model("lr", cross_validation=True, fold=5, verbose=False)

# Make predictions on test set
preds = predict_model(lr_model, data=X_test)

# Calculate log loss
loss = log_loss(y_test, preds['prediction_score'])

# Log log loss to MLflow
with mlflow.start_run(nested=True):
    mlflow.log_metric("log_loss", loss)
    mlflow.end_run()

# Choose the Random Forest Classifier classification algorithm and create the model
rf_model = create_model("rf")

# Make predictions on test set
preds = predict_model(rf_model, data=X_test)

# Calculate log loss and F1_score
loss = log_loss(y_test, preds["prediction_score"])
f1 = f1_score(y_test, preds["prediction_label"])

# Log log loss and F1_score to MLflow
with mlflow.start_run(nested=True):
    mlflow.log_metric("log_loss", loss)
    mlflow.log_metric("f1_score", f1)
    mlflow.end_run()

# Register the model in MLflow
mlflow.sklearn.log_model(rf_model, "classifier")
print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

# Serve the model using MLflow
!mlflow models serve -m {model_uri} -p 1234 --no-conda &

# Filter data for '3PT Field Goal'
kobe_data_3pt = original_data[original_data['shot_type'] == '3PT Field Goal']

# Prepare de new data
X_new = kobe_data_3pt.drop("shot_made_flag", axis=1)
y_new = kobe_data_3pt["shot_made_flag"]
X_new.to_parquet("./../Data/Modeling/base_3pt.parquet")

# API
data = X_new.to_json(orient="split")
headers = {"Content-Type": "application/json"}
response = requests.post("http://127.0.0.1:1234/invocations", data=data, headers=headers)

predictions = pd.Series(json.loads(response.text))

# Apply the trained model and calculate the log loss and F1_score
log_loss_result = log_loss(y_new, predictions)
f1_score_result = f1_score(y_new, predictions.round())

print(f"Log Loss: {log_loss_result}")
print(f"F1 Score: {f1_score_result}")