In [1]:
#!pip install -U hopsworks

In [2]:
import os

import pandas as pd
import numpy as np

import hopsworks

from datetime import datetime, timedelta
from pytz import timezone

from src.webscraping import (
    activate_web_driver,
    scrape_to_dataframe,
    convert_columns,
    combine_home_visitor,  
)

from src.data_processing import (
    process_games,
    add_TARGET,
)

from src.feature_engineering import (
    fix_datatypes,
    add_date_features,
    remove_playoff_games,
    add_rolling_home_visitor,
    process_games_consecutively,
    add_matchups,
    add_past_performance_all,
    combine_new_features,
    process_x_minus_y,  
    remove_non_rolling,
    process_features,
)

import json

from pyvirtualdisplay import Display

from pathlib import Path  #for Windows/Linux compatibility
DATAPATH = Path(r'data')

In [3]:
try:
    HOPSWORKS_API_KEY = os.environ['HOPSWORKS_API_KEY']
except:
    raise Exception('Set environment variable HOPSWORKS_API_KEY')

In [4]:
display = Display(visible=0, size=(1920,1200))  
display.start()

FileNotFoundError: [WinError 2] The system cannot find the file specified

**Scrape Data and Format**

In [None]:

# set search strings for the last seven days 
DAYS = 7
SEASON = "" #no season will cause website to default to current season, format is "2022-23"
TODAY = datetime.now(timezone('EST')) #nba.com uses US Eastern Standard Time
LASTWEEK = (TODAY - timedelta(days=DAYS))
DATETO = TODAY.strftime("%m/%d/%y")
DATEFROM = LASTWEEK.strftime("%m/%d/%y")

# initate a webdriver in selenium 
# since website data is dynamically generated
driver = activate_web_driver()

df = scrape_to_dataframe(driver, Season=SEASON, DateFrom=DATEFROM, DateTo=DATETO)

driver.close() 

df = convert_columns(df)
df = combine_home_visitor(df)


### Data Processing

In [None]:
df = process_games(df) 
df = add_TARGET(df)

### Feature Engineering

In [None]:
# Feature engineering to add: 
    # rolling averages of key stats, 
    # win/lose streaks, 
    # home/away streaks, 
    # specific matchup (team X vs team Y) rolling averages and streaks

df = process_features(df)


### Add Data to Feature Store

In [None]:
project = hopsworks.login(api_key_value=HOPSWORKS_API_KEY)
fs = project.get_feature_store()

In [None]:
rolling_stats_fg = fs.get_feature_group(
    name="rolling_stats",
    version=1,
)
rolling_stats_fg.insert(df, write_options={"wait_for_job" : False})

In [None]:
def save_feature_names(df):
    # hopsworks "sanitizes" feature names by converting to all lowercase
    # this function saves the original so that they can be re-mapped later
    # for code re-usuability
    
    feature_names = df.columns.tolist()
    with open("feature_names.json", "w") as fp:
        json.dump(feature_names, fp)
        
    return "File Saved."

save_feature_names(df)
