In [1]:
import os
import sys
import pandas as pd
import requests
import logging
from datetime import datetime, timedelta
import random
import time
import psycopg2
from io import StringIO
from dotenv import load_dotenv

# Add project root to sys.path
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

# Verify sys.path
print("Current sys.path:", sys.path)

from src.data_processing.nst_scraper import nst_on_ice_scraper, nst_team_on_ice_scraper
from src.db.nst_db_utils import *


pd.set_option('display.max_columns', None)


Current sys.path: ['/Library/Frameworks/Python.framework/Versions/3.12/lib/python312.zip', '/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12', '/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/lib-dynload', '', '/Users/jweinga/Documents/python/accurateshothelper/.venv/lib/python3.12/site-packages', '/Users/jweinga/Documents/python/accurateshothelper']


In [2]:
# nst_team_on_ice_scraper(startdate='', enddate='2024-10-12',last_n=30)
# df = nst_on_ice_scraper(startdate='', enddate='2024-10-09', last_n=30, rate='y', lines='single') 

In [3]:
# goalie_stats_df = nst_on_ice_scraper(
#             startdate='2024-10-12',
#             enddate='2024-10-12',
#             pos='G',
#             rate='n',
#             stdoi='g',
#             lines='single'
#         )


In [4]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [5]:
# Load environment variables from .env file
load_dotenv()

db_prefix = 'NST_DB_'

In [6]:
NHL_SEASONS = {
    20242025: {
        'start': '2024-10-04',
        'regular_end': '2025-04-18',  # Estimated
        'playoff_end': '2025-06-30'  # Estimated
    },
    20232024: {
        'start': '2023-10-10',
        'regular_end': '2024-04-18',
        'playoff_end': '2024-06-24'
    },
    20222023: {
        'start': '2022-10-07',
        'regular_end': '2023-04-14',
        'playoff_end': '2023-06-13'
    },
    20212022: {
        'start': '2021-10-12',
        'regular_end': '2022-04-29',
        'playoff_end': '2022-06-26'
    },
    20202021: {
        'start': '2021-01-13',
        'regular_end': '2021-05-19',
        'playoff_end': '2021-07-07'  # Covid-shortened season
    }
}

In [7]:
# scrape_goalie_stats_range(
#         start_date='2023-03-01',
#         end_date='2023-04-14',
#         db_prefix=db_prefix,  # Make sure this matches your environment variables
#         delay_min=90,
#         delay_max=120,
#         situation='pk'
#     )

In [8]:
# nst_team_on_ice_scraper(
#                 startdate='2025-02-24',
#                 enddate='2025-02-24',
#                 sit='5v5',
#                 stype=2
#             )


In [9]:
NHL_SEASONS[20222023]['playoff_end']

'2023-06-13'

In [10]:
# scrape_team_stats_range(
#     start_date=NHL_SEASONS[20242025]['start'],
#     end_date='2025-02-25',
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pp'
# )
# time.sleep(100)
# scrape_team_stats_range(
#     start_date=NHL_SEASONS[20232024]['start'],
#     end_date=NHL_SEASONS[20232024]['regular_end'],
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pp'
# )
# time.sleep(100)
# scrape_team_stats_range(
#     start_date=NHL_SEASONS[20222023]['start'],
#     end_date=NHL_SEASONS[20222023]['regular_end'],
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pp'
# )
# time.sleep(100)
# scrape_team_stats_range(
#     start_date=NHL_SEASONS[20212022]['start'],
#     end_date=NHL_SEASONS[20212022]['regular_end'],
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pp'
# )
# time.sleep(100)
# scrape_team_stats_range(
#     start_date=NHL_SEASONS[20202021]['start'],
#     end_date=NHL_SEASONS[20202021]['regular_end'],
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pp'
# )

In [11]:
# scrape_team_stats_range(
#     start_date="2022-01-04",
#     end_date=NHL_SEASONS[20212022]['regular_end'],
#     db_prefix=db_prefix,
#     delay_min=65,
#     delay_max=90,
#     situation='pk'
# )

In [12]:
from src.db.nst_db_utils import add_home_away_data_from_nhl_api

# Add home/away data for a specific date range
add_home_away_data_from_nhl_api(
    start_date="2021-10-12",
    end_date="2021-10-31",
    table_name="team_stats_5v5"  # Or any other table you want to update
)

INFO:src.db.base_utils:Database connection established.
INFO:src.db.base_utils:Database connection established.
INFO:src.db.base_utils:Database connection closed.
INFO:src.db.nst_db_utils:Found 262 team-date combinations in the database
INFO:src.db.nst_db_utils:Sample data (date, team): [('2021-10-31', 'Columbus Blue Jackets'), ('2021-10-18', 'Anaheim Ducks'), ('2021-10-21', 'Anaheim Ducks'), ('2021-10-26', 'Montreal Canadiens'), ('2021-10-16', 'Vancouver Canucks')]
INFO:src.db.nst_db_utils:Processing games for date: 2021-10-12
INFO:src.db.nst_db_utils:Found 2 games using direct schedule endpoint
INFO:src.db.nst_db_utils:Mapped home team: TBL -> Tampa Bay Lightning
INFO:src.db.nst_db_utils:Mapped away team: PIT -> Pittsburgh Penguins
INFO:src.db.nst_db_utils:Game: Pittsburgh Penguins @ Tampa Bay Lightning on 2021-10-12 - Updated 2 records
INFO:src.db.nst_db_utils:Mapped home team: VGK -> Vegas Golden Knights
INFO:src.db.nst_db_utils:Mapped away team: SEA -> Seattle Kraken
INFO:src.db.n

KeyboardInterrupt: 