In [38]:
import os

import pandas as pd
import numpy as np

import hopsworks

from datetime import datetime, timedelta
from pytz import timezone

from src.webscraping import (
    activate_web_driver,
    scrape_to_dataframe,
    convert_columns,
    combine_home_visitor,  
    get_todays_matchups,
)

from src.data_processing import (
    process_games,
    add_TARGET,
)

from src.feature_engineering import (
    process_features,
)

from src.hopsworks_utils import (
    save_feature_names,
    convert_feature_names,
)

import json

from pathlib import Path  #for Windows/Linux compatibility
DATAPATH = Path(r'data')

**Load API keys**

In [39]:
from dotenv import load_dotenv

load_dotenv()

try:
    HOPSWORKS_API_KEY = os.environ['HOPSWORKS_API_KEY']
except:
    raise Exception('Set environment variable HOPSWORKS_API_KEY')

**Activate Webdriver**

In [40]:
# initiate a webdriver in selenium 
# since website data is dynamically generated

driver = activate_web_driver('firefox')

2023-01-08 09:38:41,331 INFO: Get LATEST geckodriver version for 108.0 firefox


[WDM] - Downloading: 19.0kB [00:00, 3.96MB/s]                   


2023-01-08 09:38:41,910 INFO: Driver [C:\Users\Chris\.wdm\drivers\geckodriver\win64\0.32\geckodriver.exe] found in cache




**Scrape New Completed Games and Format Them**

In [41]:

def get_new_games(driver)-> pd.DataFrame:

    # set search for yesterday's games
    DAYS = 1
    SEASON = "" #no season will cause website to default to current season, format is "2022-23"
    TODAY = datetime.now(timezone('EST')) #nba.com uses US Eastern Standard Time
    LASTWEEK = (TODAY - timedelta(days=DAYS))
    DATETO = TODAY.strftime("%m/%d/%y")
    DATEFROM = LASTWEEK.strftime("%m/%d/%y")


    df = scrape_to_dataframe(driver, Season=SEASON, DateFrom=DATEFROM, DateTo=DATETO)

    df = convert_columns(df)

    print(df.info())
    df = combine_home_visitor(df)

    return df

df_new = get_new_games(driver)

# get the SEASON of the last game in the database
# this will used when constructing rows for prediction
SEASON = df_new['SEASON'].max()

df_new




<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   HOME            10 non-null     int64         
 1   GAME_DATE_EST   10 non-null     datetime64[ns]
 2   HOME_TEAM_WINS  10 non-null     int64         
 3   PTS             10 non-null     int64         
 4   FG_PCT          10 non-null     float64       
 5   FG3_PCT         10 non-null     float64       
 6   FT_PCT          10 non-null     float64       
 7   REB             10 non-null     int64         
 8   AST             10 non-null     int64         
 9   TEAM_ID         10 non-null     object        
 10  GAME_ID         10 non-null     object        
dtypes: datetime64[ns](1), float64(3), int64(5), object(2)
memory usage: 960.0+ bytes
None


Unnamed: 0,GAME_DATE_EST,HOME_TEAM_WINS,PTS_home,FG_PCT_home,FG3_PCT_home,FT_PCT_home,REB_home,AST_home,HOME_TEAM_ID,GAME_ID,PTS_away,FG_PCT_away,FG3_PCT_away,FT_PCT_away,REB_away,AST_away,VISITOR_TEAM_ID,SEASON
0,2023-01-07,1,121,49.5,44.1,66.7,43,30,1610612738,22200591,116,49.0,28.0,78.6,51,25,1610612759,2022
1,2023-01-07,0,118,49.5,29.3,77.8,39,36,1610612762,22200592,126,53.0,46.4,78.1,44,22,1610612741,2022
2,2023-01-07,0,117,47.9,17.2,62.9,44,26,1610612740,22200593,127,56.8,44.8,78.9,39,23,1610612742,2022
3,2023-01-07,1,115,50.6,40.6,58.3,44,26,1610612753,22200594,101,37.5,31.0,81.0,49,26,1610612744,2022
4,2023-01-07,1,136,61.0,42.9,81.8,38,26,1610612747,22200595,134,52.3,43.8,81.1,34,25,1610612758,2022


**Retrieve todays games**

In [42]:
#retrieve list of teams playing today

# get today's games on NBA schedule
matchups, game_ids = get_todays_matchups(driver)


print(matchups)
print(game_ids)




[['1610612755', '1610612765'], ['1610612757', '1610612761'], ['1610612766', '1610612754'], ['1610612751', '1610612748'], ['1610612762', '1610612763'], ['1610612750', '1610612745'], ['1610612742', '1610612760'], ['1610612739', '1610612756'], ['1610612737', '1610612746']]
['22200596', '22200597', '22200598', '22200599', '22200600', '22200601', '22200602', '22200603', '22200604']


**Close Webdriver**

In [43]:
driver.close() 

**Create Rows for Today's Games with Empty Stats**

In [44]:
# append today's matchups to the new games dataframe


df_today = df_new.drop(df_new.index) #empty copy of df_new with same columns
for i, matchup in enumerate(matchups):
    game_details = {'HOME_TEAM_ID': matchup[1], 
                    'VISITOR_TEAM_ID': matchup[0], 
                    'GAME_DATE_EST': datetime.now(timezone('EST')).strftime("%Y-%m-%d"), 
                    'GAME_ID': int(game_ids[i]),                       
                    'SEASON': SEASON,
                    } 
    game_details_df = pd.DataFrame(game_details, index=[i])
    # append to new games dataframe
    df_today = pd.concat([df_today, game_details_df], ignore_index = True)

#blank rows will be filled with 0 to prevent issues with feature engineering
df_today = df_today.fillna(0) 

df_today


Unnamed: 0,GAME_DATE_EST,HOME_TEAM_WINS,PTS_home,FG_PCT_home,FG3_PCT_home,FT_PCT_home,REB_home,AST_home,HOME_TEAM_ID,GAME_ID,PTS_away,FG_PCT_away,FG3_PCT_away,FT_PCT_away,REB_away,AST_away,VISITOR_TEAM_ID,SEASON
0,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612765,22200596,0.0,0.0,0.0,0.0,0.0,0.0,1610612755,2022
1,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612761,22200597,0.0,0.0,0.0,0.0,0.0,0.0,1610612757,2022
2,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612754,22200598,0.0,0.0,0.0,0.0,0.0,0.0,1610612766,2022
3,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612748,22200599,0.0,0.0,0.0,0.0,0.0,0.0,1610612751,2022
4,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612763,22200600,0.0,0.0,0.0,0.0,0.0,0.0,1610612762,2022
5,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612745,22200601,0.0,0.0,0.0,0.0,0.0,0.0,1610612750,2022
6,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612760,22200602,0.0,0.0,0.0,0.0,0.0,0.0,1610612742,2022
7,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612756,22200603,0.0,0.0,0.0,0.0,0.0,0.0,1610612739,2022
8,2023-01-08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1610612746,22200604,0.0,0.0,0.0,0.0,0.0,0.0,1610612737,2022


**Access Feature Store**

In [45]:
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
fs = project.get_feature_store()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/3350
Connected. Call `.close()` to terminate connection gracefully.


**Access Feature Group**

In [46]:
rolling_stats_fg = fs.get_feature_group(
    name="rolling_stats",
    version=1,
)

**Query Old Data Needed for Feature Engineering of New Data**

To generate features like rolling averages for the new games, older data from previous games is needed since some of the rolling averages might extend back 15 or 20 games or so.

In [47]:
BASE_FEATURES = ['game_date_est',
 'game_id',
 'home_team_id',
 'visitor_team_id',
 'season',
 'pts_home',
 'fg_pct_home',
 'ft_pct_home',
 'fg3_pct_home',
 'ast_home',
 'reb_home',
 'pts_away',
 'fg_pct_away',
 'ft_pct_away',
 'fg3_pct_away',
 'ast_away',
 'reb_away',
 'home_team_wins',
]

ds_query = rolling_stats_fg.select(BASE_FEATURES)
df_old = ds_query.read()
df_old


2023-01-08 09:40:44,984 INFO: USE `nba_predictor_featurestore`
2023-01-08 09:40:45,312 INFO: SELECT `fg0`.`game_date_est` `game_date_est`, `fg0`.`game_id` `game_id`, `fg0`.`home_team_id` `home_team_id`, `fg0`.`visitor_team_id` `visitor_team_id`, `fg0`.`season` `season`, `fg0`.`pts_home` `pts_home`, `fg0`.`fg_pct_home` `fg_pct_home`, `fg0`.`ft_pct_home` `ft_pct_home`, `fg0`.`fg3_pct_home` `fg3_pct_home`, `fg0`.`ast_home` `ast_home`, `fg0`.`reb_home` `reb_home`, `fg0`.`pts_away` `pts_away`, `fg0`.`fg_pct_away` `fg_pct_away`, `fg0`.`ft_pct_away` `ft_pct_away`, `fg0`.`fg3_pct_away` `fg3_pct_away`, `fg0`.`ast_away` `ast_away`, `fg0`.`reb_away` `reb_away`, `fg0`.`home_team_wins` `home_team_wins`
FROM `nba_predictor_featurestore`.`rolling_stats_1` `fg0`




Unnamed: 0,game_date_est,game_id,home_team_id,visitor_team_id,season,pts_home,fg_pct_home,ft_pct_home,fg3_pct_home,ast_home,reb_home,pts_away,fg_pct_away,ft_pct_away,fg3_pct_away,ast_away,reb_away,home_team_wins
0,2007-02-25,20600832,1610612765,1610612741,2006,95,0.377930,0.799805,0.500000,20,41,93,0.479980,0.666992,0.385010,21,45,1
1,2006-12-11,20600301,1610612753,1610612756,2006,89,0.451904,0.523926,0.166992,18,42,103,0.514160,0.852051,0.250000,25,36,0
2,2007-11-10,20700084,1610612758,1610612750,2007,100,0.405029,0.850098,0.399902,12,38,93,0.438965,0.750000,0.213989,15,43,1
3,2005-01-11,20400504,1610612760,1610612746,2004,104,0.437988,0.889160,0.500000,24,40,99,0.438965,0.758789,0.333008,23,47,1
4,2016-10-29,21600028,1610612752,1610612763,2016,111,0.525879,0.641113,0.285889,24,41,104,0.429932,0.808105,0.360107,19,40,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23357,2017-12-10,21700387,1610612754,1610612743,2017,126,0.526855,0.817871,0.399902,24,40,116,0.452881,0.812988,0.405029,24,47,1
23358,2015-04-10,21401175,1610612753,1610612761,2014,99,0.460938,1.000000,0.350098,21,46,101,0.415039,0.933105,0.300049,22,46,0
23359,2005-01-15,20400537,1610612745,1610612759,2004,73,0.333008,0.730957,0.399902,15,49,67,0.353027,0.629883,0.125000,10,39,1
23360,2012-03-07,21100575,1610612749,1610612741,2011,104,0.477051,0.789062,0.333008,29,35,106,0.518066,0.881836,0.312988,26,43,0


**Convert Feature Names back to original mixed case**

In [48]:
df_old = convert_feature_names(df_old)
df_old

Unnamed: 0,GAME_DATE_EST,GAME_ID,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,2007-02-25,20600832,1610612765,1610612741,2006,95,0.377930,0.799805,0.500000,20,41,93,0.479980,0.666992,0.385010,21,45,1
1,2006-12-11,20600301,1610612753,1610612756,2006,89,0.451904,0.523926,0.166992,18,42,103,0.514160,0.852051,0.250000,25,36,0
2,2007-11-10,20700084,1610612758,1610612750,2007,100,0.405029,0.850098,0.399902,12,38,93,0.438965,0.750000,0.213989,15,43,1
3,2005-01-11,20400504,1610612760,1610612746,2004,104,0.437988,0.889160,0.500000,24,40,99,0.438965,0.758789,0.333008,23,47,1
4,2016-10-29,21600028,1610612752,1610612763,2016,111,0.525879,0.641113,0.285889,24,41,104,0.429932,0.808105,0.360107,19,40,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23357,2017-12-10,21700387,1610612754,1610612743,2017,126,0.526855,0.817871,0.399902,24,40,116,0.452881,0.812988,0.405029,24,47,1
23358,2015-04-10,21401175,1610612753,1610612761,2014,99,0.460938,1.000000,0.350098,21,46,101,0.415039,0.933105,0.300049,22,46,0
23359,2005-01-15,20400537,1610612745,1610612759,2004,73,0.333008,0.730957,0.399902,15,49,67,0.353027,0.629883,0.125000,10,39,1
23360,2012-03-07,21100575,1610612749,1610612741,2011,104,0.477051,0.789062,0.333008,29,35,106,0.518066,0.881836,0.312988,26,43,0


**Update Yesterday's Matchup Predictions with New Final Results**

In [49]:
# filter out games that are pending final results
# (these were the rows used for prediction yesterday)
# and then update these with the new results


df_old = df_old.set_index('GAME_ID')
df_old.update(df_new.set_index('GAME_ID'))
df_old = df_old.reset_index()  


df_old

Unnamed: 0,GAME_ID,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,20600832,2007-02-25,1.610613e+09,1.610613e+09,2006.0,95.0,0.377930,0.799805,0.500000,20.0,41.0,93.0,0.479980,0.666992,0.385010,21.0,45.0,1.0
1,20600301,2006-12-11,1.610613e+09,1.610613e+09,2006.0,89.0,0.451904,0.523926,0.166992,18.0,42.0,103.0,0.514160,0.852051,0.250000,25.0,36.0,0.0
2,20700084,2007-11-10,1.610613e+09,1.610613e+09,2007.0,100.0,0.405029,0.850098,0.399902,12.0,38.0,93.0,0.438965,0.750000,0.213989,15.0,43.0,1.0
3,20400504,2005-01-11,1.610613e+09,1.610613e+09,2004.0,104.0,0.437988,0.889160,0.500000,24.0,40.0,99.0,0.438965,0.758789,0.333008,23.0,47.0,1.0
4,21600028,2016-10-29,1.610613e+09,1.610613e+09,2016.0,111.0,0.525879,0.641113,0.285889,24.0,41.0,104.0,0.429932,0.808105,0.360107,19.0,40.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23357,21700387,2017-12-10,1.610613e+09,1.610613e+09,2017.0,126.0,0.526855,0.817871,0.399902,24.0,40.0,116.0,0.452881,0.812988,0.405029,24.0,47.0,1.0
23358,21401175,2015-04-10,1.610613e+09,1.610613e+09,2014.0,99.0,0.460938,1.000000,0.350098,21.0,46.0,101.0,0.415039,0.933105,0.300049,22.0,46.0,0.0
23359,20400537,2005-01-15,1.610613e+09,1.610613e+09,2004.0,73.0,0.333008,0.730957,0.399902,15.0,49.0,67.0,0.353027,0.629883,0.125000,10.0,39.0,1.0
23360,21100575,2012-03-07,1.610613e+09,1.610613e+09,2011.0,104.0,0.477051,0.789062,0.333008,29.0,35.0,106.0,0.518066,0.881836,0.312988,26.0,43.0,0.0


**Add Today's Matchups for Feature Engineering**

In [50]:
df_combined = pd.concat([df_old, df_today], ignore_index = True)
df_combined

Unnamed: 0,GAME_ID,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
0,20600832,2007-02-25 00:00:00,1610612765.0,1610612741.0,2006.0,95.0,0.377930,0.799805,0.500000,20.0,41.0,93.0,0.479980,0.666992,0.385010,21.0,45.0,1.0
1,20600301,2006-12-11 00:00:00,1610612753.0,1610612756.0,2006.0,89.0,0.451904,0.523926,0.166992,18.0,42.0,103.0,0.514160,0.852051,0.250000,25.0,36.0,0.0
2,20700084,2007-11-10 00:00:00,1610612758.0,1610612750.0,2007.0,100.0,0.405029,0.850098,0.399902,12.0,38.0,93.0,0.438965,0.750000,0.213989,15.0,43.0,1.0
3,20400504,2005-01-11 00:00:00,1610612760.0,1610612746.0,2004.0,104.0,0.437988,0.889160,0.500000,24.0,40.0,99.0,0.438965,0.758789,0.333008,23.0,47.0,1.0
4,21600028,2016-10-29 00:00:00,1610612752.0,1610612763.0,2016.0,111.0,0.525879,0.641113,0.285889,24.0,41.0,104.0,0.429932,0.808105,0.360107,19.0,40.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23366,22200600,2023-01-08,1610612763,1610612762,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0
23367,22200601,2023-01-08,1610612745,1610612750,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0
23368,22200602,2023-01-08,1610612760,1610612742,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0
23369,22200603,2023-01-08,1610612756,1610612739,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0


**Data Processing**

In [51]:
df_combined = process_games(df_combined) 
df_combined = add_TARGET(df_combined)
df_combined

Unnamed: 0,GAME_ID,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS,PLAYOFF,TARGET
0,20600832,2007-02-25 00:00:00,1610612765.0,1610612741.0,2006.0,95.0,0.377930,0.799805,0.500000,20.0,41.0,93.0,0.479980,0.666992,0.385010,21.0,45.0,1.0,0,1.0
1,20600301,2006-12-11 00:00:00,1610612753.0,1610612756.0,2006.0,89.0,0.451904,0.523926,0.166992,18.0,42.0,103.0,0.514160,0.852051,0.250000,25.0,36.0,0.0,0,0.0
2,20700084,2007-11-10 00:00:00,1610612758.0,1610612750.0,2007.0,100.0,0.405029,0.850098,0.399902,12.0,38.0,93.0,0.438965,0.750000,0.213989,15.0,43.0,1.0,0,1.0
3,20400504,2005-01-11 00:00:00,1610612760.0,1610612746.0,2004.0,104.0,0.437988,0.889160,0.500000,24.0,40.0,99.0,0.438965,0.758789,0.333008,23.0,47.0,1.0,0,1.0
4,21600028,2016-10-29 00:00:00,1610612752.0,1610612763.0,2016.0,111.0,0.525879,0.641113,0.285889,24.0,41.0,104.0,0.429932,0.808105,0.360107,19.0,40.0,1.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23366,22200600,2023-01-08,1610612763,1610612762,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0,0.0
23367,22200601,2023-01-08,1610612745,1610612750,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0,0.0
23368,22200602,2023-01-08,1610612760,1610612742,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0,0.0
23369,22200603,2023-01-08,1610612756,1610612739,2022.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0,0.0


**Feature Engineering**

In [52]:
# Feature engineering to add: 
    # rolling averages of key stats, 
    # win/lose streaks, 
    # home/away streaks, 
    # specific matchup (team X vs team Y) rolling averages and streaks

df_combined = process_features(df_combined)
df_combined


#fix type conversion issues with hopsworks
df_combined['TARGET'] = df_combined['TARGET'].astype('int16')
df_combined['HOME_TEAM_WINS'] = df_combined['HOME_TEAM_WINS'].astype('int16')


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#r

**Insert New Data into Feature Group**

In [53]:

def test():
    # retrieve only new games from the combined dataframe now that feature engineering is complete

    # set index to GAME_ID
    df_combined = df_combined.set_index('GAME_ID')
    df_new = df_new.set_index('GAME_ID')
    
    # retrieve only new games
    df_new = df_combined.loc[df_new.index]

    # reset GAME_ID index back to column
    df_new = df_new.reset_index()

    # convert certain features back to int32 for Hopsworks compatibility
    df_new['GAME_ID'] = df_new['GAME_ID'].astype('int32')
    df_new['HOME_TEAM_WINS'] = df_new['HOME_TEAM_WINS'].astype('int32')
    df_new['TARGET'] = df_new['TARGET'].astype('int32')

    # save new games to Hopsworks feature group
    rolling_stats_fg.insert(df_new, write_options={"wait_for_job" : False})

    df_new 

rolling_stats_fg.insert(df_combined, write_options={"wait_for_job" : False})



Uploading Dataframe: 0.00% |          | Rows 0/23371 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/3350/jobs/named/rolling_stats_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x18a8f6f39d0>, None)

In [54]:
df_old.tail(20)

Unnamed: 0,GAME_ID,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,REB_home,PTS_away,FG_PCT_away,FT_PCT_away,FG3_PCT_away,AST_away,REB_away,HOME_TEAM_WINS
23342,21800022,2018-10-19,1610613000.0,1610613000.0,2018.0,149.0,0.588867,0.63623,0.516113,35.0,52.0,129.0,0.520996,0.730957,0.399902,24.0,39.0,1.0
23343,20401142,2005-04-10,1610613000.0,1610613000.0,2004.0,124.0,0.509766,0.850098,0.280029,28.0,62.0,105.0,0.419922,0.902832,0.187988,25.0,32.0,1.0
23344,20300492,2004-01-07,1610613000.0,1610613000.0,2003.0,101.0,0.5,0.759766,0.285889,24.0,50.0,93.0,0.39209,0.625,0.350098,20.0,37.0,1.0
23345,21200586,2013-01-18,1610613000.0,1610613000.0,2012.0,100.0,0.468018,0.713867,0.35498,20.0,36.0,106.0,0.468018,0.736816,0.333008,16.0,43.0,0.0
23346,21000852,2011-02-23,1610613000.0,1610613000.0,2010.0,114.0,0.5,0.929199,0.364014,20.0,43.0,108.0,0.438965,0.780762,0.478027,18.0,40.0,1.0
23347,20900583,2010-01-15,1610613000.0,1610613000.0,2009.0,102.0,0.470947,0.899902,0.523926,23.0,42.0,87.0,0.479004,0.5,0.233032,17.0,38.0,1.0
23348,21400496,2015-01-03,1610613000.0,1610613000.0,2014.0,109.0,0.370117,0.725098,0.213989,19.0,59.0,104.0,0.449951,0.817871,0.293945,26.0,45.0,1.0
23349,21200646,2013-01-26,1610613000.0,1610613000.0,2012.0,119.0,0.493896,0.777832,0.428955,31.0,50.0,106.0,0.449951,0.856934,0.444092,21.0,31.0,1.0
23350,21700850,2018-02-13,1610613000.0,1610613000.0,2017.0,112.0,0.444092,0.700195,0.313965,23.0,51.0,120.0,0.51123,0.63623,0.444092,24.0,41.0,0.0
23351,20600865,2007-03-01,1610613000.0,1610613000.0,2006.0,95.0,0.475098,0.789062,0.333008,22.0,44.0,92.0,0.488037,0.5,0.278076,19.0,38.0,1.0


In [57]:
df_combined[df_combined['GAME_ID'] > 22200580]

Unnamed: 0,GAME_ID,GAME_DATE_EST,HOME_TEAM_ID,VISITOR_TEAM_ID,SEASON,PTS_home,FG_PCT_home,FT_PCT_home,FG3_PCT_home,AST_home,...,FG3_PCT_AVG_LAST_10_ALL_x_minus_y,FG3_PCT_AVG_LAST_15_ALL_x_minus_y,AST_AVG_LAST_3_ALL_x_minus_y,AST_AVG_LAST_7_ALL_x_minus_y,AST_AVG_LAST_10_ALL_x_minus_y,AST_AVG_LAST_15_ALL_x_minus_y,REB_AVG_LAST_3_ALL_x_minus_y,REB_AVG_LAST_7_ALL_x_minus_y,REB_AVG_LAST_10_ALL_x_minus_y,REB_AVG_LAST_15_ALL_x_minus_y
23346,22200587,2023-01-06,1610612746,1610612750,2022,115,44.0,66.6875,36.0,22,...,2.623437,2.220833,-1.666667,0.857143,-0.5,-0.6,0.333333,1.428571,3.7,5.2
23347,22200585,2023-01-06,1610612765,1610612759,2022,109,47.3125,58.8125,35.5,23,...,-0.060938,1.584375,-4.333333,-1.571429,-3.6,-2.466667,3.0,1.857143,1.5,2.133333
23348,22200586,2023-01-06,1610612751,1610612740,2022,108,43.5,95.0,45.5,26,...,5.976562,6.458333,4.666667,4.857143,2.5,1.533333,-8.333333,-7.285714,-6.4,-4.733333
23349,22200583,2023-01-06,1610612766,1610612749,2022,138,52.0,63.1875,41.6875,32,...,-1.440625,-1.122917,0.666667,4.285714,3.1,1.133333,-10.0,-8.285714,-5.8,-5.333333
23351,22200589,2023-01-06,1610612748,1610612756,2022,104,42.3125,83.875,31.59375,19,...,-7.257812,-3.93125,2.0,-1.285714,-0.7,-1.333333,0.333333,-0.857143,-2.1,-2.933333
23352,22200584,2023-01-06,1610612764,1610612760,2022,110,48.09375,75.0,29.203125,21,...,-5.698438,-2.576042,6.333333,4.0,1.6,1.733333,8.666667,2.571429,1.2,-1.133333
23353,22200590,2023-01-06,1610612737,1610612747,2022,114,45.8125,77.3125,29.0,24,...,-0.782812,-1.315625,-2.333333,-1.285714,0.8,-0.733333,-4.0,0.714286,0.6,0.133333
23354,22200581,2023-01-06,1610612741,1610612755,2022,126,57.09375,58.8125,58.8125,33,...,1.529688,-1.044792,-7.333333,-4.428571,-2.8,-1.866667,-3.0,2.571429,-0.5,-0.2
23355,22200588,2023-01-06,1610612739,1610612743,2022,108,45.5,56.3125,38.5,30,...,-6.220312,-5.344792,-8.0,-7.571429,-7.5,-6.666667,2.0,-0.428571,-1.4,-0.4
23356,22200582,2023-01-06,1610612752,1610612761,2022,112,47.0,69.1875,43.1875,19,...,3.820312,6.213542,-2.0,1.0,1.0,0.933333,8.0,5.857143,5.0,7.066667
