In [1]:
import os

import streamlit as st
import hopsworks
import joblib
import pandas as pd
import numpy as np
import json
import time
from datetime import timedelta, datetime
import xgboost as xgb

from src.hopsworks_utils import (
    convert_feature_names,
)

from src.feature_engineering import (
    fix_datatypes,
    remove_non_rolling,
)


# Load hopsworks API key from .env file

from dotenv import load_dotenv

load_dotenv()

try:
    HOPSWORKS_API_KEY = os.environ['HOPSWORKS_API_KEY']
except:
    raise Exception('Set environment variable HOPSWORKS_API_KEY')



def fancy_header(text, font_size=24):
    res = f'<span style="color:#ff5f27; font-size: {font_size}px;">{text}</span>'
    st.markdown(res, unsafe_allow_html=True )




# dictionary to convert team ids to team names

nba_team_names = {
    1610612737: "Atlanta Hawks",
    1610612738: "Boston Celtics",
    1610612739: "Cleveland Cavaliers",
    1610612740: "New Orleans Pelicans",
    1610612741: "Chicago Bulls",
    1610612742: "Dallas Mavericks",
    1610612743: "Denver Nuggets",
    1610612744: "Golden State Warriors",
    1610612745: "Houston Rockets",
    1610612746: "LA Clippers",
    1610612754: "Indiana Pacers",
    1610612747: "Los Angeles Lakers",
    1610612763: "Memphis Grizzlies",
    1610612748: "Miami Heat",
    1610612749: "Milwaukee Bucks",
    1610612750: "Minnesota Timberwolves",
    1610612751: "Brooklyn Nets",
    1610612752: "New York Knicks",
    1610612753: "Orlando Magic",
    1610612755: "Philadelphia 76ers",
    1610612756: "Phoenix Suns",
    1610612757: "Portland Trail Blazers",
    1610612758: "Sacramento Kings",
    1610612759: "San Antonio Spurs",
    1610612760: "Oklahoma City Thunder",
    1610612761: "Toronto Raptors",
    1610612762: "Utah Jazz",
    1610612764: "Washington Wizards",
    1610612765: "Detroit Pistons",
    1610612766: "Charlotte Hornets",
}





# Connect to Hopsworks Feature Store and get Feature Group
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
fs = project.get_feature_store()

rolling_stats_fg = fs.get_feature_group(
    name="rolling_stats",
    version=2,
)



# filter new games that are scheduled for today
# these are games where no points have been scored yet
ds_query = rolling_stats_fg.filter(rolling_stats_fg.pts_home == 0)
df_todays_matches = ds_query.read()

print(df_todays_matches.head(5))



# convert feature names back to mixed case
df_todays_matches = convert_feature_names(df_todays_matches)

# Add a column that displays the matchup using the team names 
# this will make the display more meaningful
df_todays_matches['MATCHUP'] = df_todays_matches['VISITOR_TEAM_ID'].map(nba_team_names) + " @ " + df_todays_matches['HOME_TEAM_ID'].map(nba_team_names)

# fix date and other types
df_todays_matches = fix_datatypes(df_todays_matches)

# remove features not used by model
drop_columns = ['TARGET', 'GAME_DATE_EST', 'GAME_ID', ] 
df_todays_matches = df_todays_matches.drop(drop_columns, axis=1)

# remove stats from today's games - these are blank (the game hasn't been played) and are not used by the model
use_columns = remove_non_rolling(df_todays_matches)
X = df_todays_matches[use_columns]

# MATCHUP is just for informational display, not used by model
X = X.drop('MATCHUP', axis=1) 

#X_dmatrix = xgb.DMatrix(X) # convert to DMatrix for XGBoost

print(df_todays_matches['MATCHUP'])



model = get_model(project=project,
                  model_name="xgboost",
                  evaluation_metric="AUC",
                  sort_metrics_by="max")




#preds = model.predict(X_dmatrix)
preds = model.predict_proba(X)[:,1]

df_todays_matches['HOME_TEAM_WIN_PROBABILITY'] = preds

df_todays_matches[['MATCHUP', 'HOME_TEAM_WIN_PROBABILITY']]



Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/3350




Connected. Call `.close()` to terminate connection gracefully.


2023-03-04 08:39:42.288 INFO    pyhive.hive: USE `nba_predictor_featurestore`
2023-03-04 08:39:42.761 INFO    pyhive.hive: SELECT `fg0`.`game_date_est` `game_date_est`, `fg0`.`game_id` `game_id`, `fg0`.`home_team_id` `home_team_id`, `fg0`.`visitor_team_id` `visitor_team_id`, `fg0`.`season` `season`, `fg0`.`pts_home` `pts_home`, `fg0`.`fg_pct_home` `fg_pct_home`, `fg0`.`ft_pct_home` `ft_pct_home`, `fg0`.`fg3_pct_home` `fg3_pct_home`, `fg0`.`ast_home` `ast_home`, `fg0`.`reb_home` `reb_home`, `fg0`.`pts_away` `pts_away`, `fg0`.`fg_pct_away` `fg_pct_away`, `fg0`.`ft_pct_away` `ft_pct_away`, `fg0`.`fg3_pct_away` `fg3_pct_away`, `fg0`.`ast_away` `ast_away`, `fg0`.`reb_away` `reb_away`, `fg0`.`home_team_wins` `home_team_wins`, `fg0`.`target` `target`, `fg0`.`month` `month`, `fg0`.`home_team_win_streak` `home_team_win_streak`, `fg0`.`home_team_wins_avg_last_3_home` `home_team_wins_avg_last_3_home`, `fg0`.`home_team_wins_avg_last_7_home` `home_team_wins_avg_last_7_home`, `fg0`.`home_team_wins_a

  game_date_est   game_id  home_team_id  visitor_team_id  season  pts_home  \
0    2023-03-04  22200957    1610612739       1610612765    2022         0   
1    2023-03-04  22200959    1610612759       1610612745    2022         0   
2    2023-03-04  22200956    1610612764       1610612761    2022         0   
3    2023-03-04  22200961    1610612758       1610612750    2022         0   
4    2023-03-04  22200960    1610612749       1610612755    2022         0   

   fg_pct_home  ft_pct_home  fg3_pct_home  ast_home  ...  \
0          0.0          0.0           0.0         0  ...   
1          0.0          0.0           0.0         0  ...   
2          0.0          0.0           0.0         0  ...   
3          0.0          0.0           0.0         0  ...   
4          0.0          0.0           0.0         0  ...   

   fg3_pct_avg_last_10_all_x_minus_y  fg3_pct_avg_last_15_all_x_minus_y  \
0                           2.089063                           1.838542   
1                   

NameError: name 'get_model' is not defined