In [24]:
##### importing custom modules from the projects folder
import sys
from pathlib import Path
# Add project root to sys.path
PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
sys.path.append(str(PROJECT_ROOT))
import modules.scrapers as scrapers
import modules.helperModule as hf
##### --------------
from datetime import date

# ====================================
#                    PARAMS 
# ====================================
league = 'nfl'
export = True # export to csv files
today = date.today()
season = 2025
week = 0
inseason = False  # False for running proj, and rank scrapes in the offseason
#week = int(input("What week is it >>>>>> ? "))
print('Run date:',today, "\nWeek:", week, '\ninseason run:', inseason, '\nexport files:', export)
# ====================================

scraper = scrapers.scrapers(
    season = season,
    week = week,
    today = today
)

Run date: 2025-08-18 
Week: 0 
inseason run: False 
export files: True


# SCRAPE DATA
# =============

### INSEASON - Actual game scores and Game betting lines
##### run after the last game of the week before TNF game

In [None]:
scraper.cbs_game_scores(inseason = inseason, export = export)
scraper.bp_lines(export = export)

### INSEASON or OFFSEASON - Projections and Rankings
##### offseason - run at any time in the offseason once sites put them up
##### inseason - run anytime Tues-Thurs before TNF to get the proj or ranks before cames start

In [None]:
# in- and off-season
scraper.cbs_projections(inseason = inseason, export = export)
scraper.espn_projections(inseason = inseason, export = export)
scraper.nfl_projections(inseason = inseason, export = export)

(441, 60)

In [None]:
# in- and off-season
scraper.ffp_ecr_rankings(inseason = inseason, export = export)
scraper.espn_rankings(inseason = inseason, export = export)
scraper.nfl_rankings(inseason = inseason, export = export)

###  OFFSEASON - ADPs

In [None]:
scraper.cbs_adp(export = export)
scraper.ffp_adp(export = export)
scraper.espn_adp(export = export)

# PROCESS SCRAPED DATA

In [None]:
# hit db to standardize player, team, outlet, etc... ids 
scraper.generate_id_maps()

In [None]:
scraper.process_projections()

In [None]:
scraper.process_rankings()

In [None]:
scraper.process_adps()

In [None]:
scraper.process_game_lines(bettingTableName = 'betting')

# load

In [49]:
hf.export_database(
    dataframe=scraper.processed_dfs['projections'],
    database_table='projection', 
    connection_string=None, 
    if_exists='append'
)

successfully added data to projection


In [31]:
hf.export_database(
    dataframe=scraper.processed_dfs['rankings'],
    database_table='ranking', 
    connection_string=None, 
    if_exists='append'
)

successfully added data to ranking


In [21]:
hf.export_database(
    dataframe=scraper.processed_dfs['adps'],
    database_table='adp', 
    connection_string=None, 
    if_exists='append'
)

successfully added data to adp


In [None]:

hf.export_database(
    dataframe=scraper.processed_dfs['lines'], 
    database_table='betting', 
    connection_string=None, 
    if_exists='append'
)

# scratch

In [39]:
import modules.helperModule as hf
import pandas as pd

In [67]:
df_missing_players = pd.read_csv(r"C:\Users\jrbrz\Desktop\projects\projects\fantasyfootball\data\missingPlayersProj.csv")
dfplayer = hf.query_database(
    query="SELECT * FROM player"
)
# formatting imported data and prepping maps and list
dfplayer['joinName']  = dfplayer['name'].str.lower().apply(hf.apply_regex_replacements)
map_joinName_to_dbPid = pd.Series(dfplayer.playerId.values, index=dfplayer.joinName).to_dict()
players_in_db = list(dfplayer['joinName'].str.lower().apply(hf.apply_regex_replacements))

map_source = {
    'espn':'espnId', 
    'fantasyPros':'fpId', 
    'nfl':'nflId',
    'cbs':'cbsId'
}

# prepping the missing player data for updates or inserts into my db
cols_keep = [
    'outlet', 'sourceId', 'name'
]
df_missing_players = df_missing_players[cols_keep]
df_missing_players['joinName'] = df_missing_players['name'].str.lower().apply(hf.apply_regex_replacements)
# drop rows where ANY of those columns contain alphabetic characters
mask = df_missing_players['sourceId'].astype(str).str.contains(r'[A-Za-z]', na=False)
df_missing_players = df_missing_players[~mask]
df_missing_players['sourceId'] = df_missing_players['sourceId'].astype(float).astype(int)
missing_outlets = df_missing_players['outlet'].unique()
df_missing_players.shape

query successful


(1830, 4)

In [68]:

# ====================
#       INSERTS
# ====================
# get players that are not present in my databased and prep for an insert
inserts = df_missing_players[
    (~df_missing_players['joinName'].isin(players_in_db))
]

inserts.loc[:,'outlet'] = inserts['outlet'].replace('fantasyPros', 'fp')
inserts = inserts.drop_duplicates(subset=['name', 'outlet'])

# making single records f   or each player with all the ids
inserts = (
    inserts.pivot(index='name', columns='outlet', values='sourceId')
    .add_suffix('Id')        # add "Id" to each outlet column
    .reset_index()           # bring 'name' back as a column
)
inserts = inserts.copy()
# adding joinName back so that it can me used to add posId and teamId
inserts['joinName'] = inserts['name'].str.lower().apply(hf.apply_regex_replacements)


In [69]:

# retrieve position ID and team ID for the players being added
id_lookups = hf.query_database(
    query = '''
        SELECT 
            pid.name, pos.posId as posId, team as teamId
        FROM
            playeridlookupimport pid
        LEFT JOIN pos ON pos.pos = pid.position
    '''
)
id_lookups['teamId'] = id_lookups['teamId'].map(hf.team_map_nfldatapy_to_dbTid)
id_lookups['joinName'] = id_lookups['name'].str.lower().apply(hf.apply_regex_replacements)
id_lookups.drop(['name'], axis=1, inplace=True)

# merge posId and teamId to inserts
inserts = inserts.merge(id_lookups, on='joinName', how = 'left')

# python or pandas is converting some to float so making sure the ids are ints
cols_to_int = [i for i in inserts.columns if 'id' in i.lower()]
# drop rows where ANY of those columns contain alphabetic characters
inserts[cols_to_int] = (
    inserts[cols_to_int]
    .apply(pd.to_numeric, errors='coerce')  # non-numeric -> NaN
    .astype('Int64')                        # nullable int dtype, keeps NaN
)


query successful


In [70]:
id_lookups[id_lookups['joinName'] == 'quinn ewers']

Unnamed: 0,posId,teamId,joinName
4,1.0,15,quinn ewers


In [73]:
inserts[inserts['joinName'] == 'quinn ewers']

Unnamed: 0,name,cbsId,espnId,nflId,joinName,posId,teamId
309,Quinn Ewers,26746721,,2574321,quinn ewers,1,15
