In [2]:
import pandas as pd
import numpy as np
import os
import time
import glob
import nbimporter


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.chrome.options import Options

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingRegressor

from p4_AdvancedStats import predictMissing

import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category = ConvergenceWarning)

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)

Importing our team data and creating models for each of them

In [88]:
directory = '/users/blaizelahman/Desktop/CFB Model/Updated Data'
pattern = os.path.join(directory, '*updated_model*.csv')

teamFiles = glob.glob(pattern)

teamDict = {}

for file in teamFiles:

    teamDF = pd.read_csv(file)

    if teamDF.shape[0] >= 56:
    
        key = teamDF['School'][0]
        teamDict[key] = teamDF
    
        print(f'Added: {key}')

Added: Louisiana Tech
Added: Southern Mississippi
Added: Arizona State
Added: Auburn
Added: Texas Tech
Added: Minnesota
Added: NC State
Added: Georgia
Added: USC
Added: South Florida
Added: Wisconsin
Added: Miami
Added: Mississippi State
Added: Houston
Added: San José State
Added: Oklahoma State
Added: UCLA
Added: Rice
Added: Texas State
Added: Iowa
Added: Western Michigan
Added: Charlotte
Added: Florida International
Added: Ole Miss
Added: UTEP
Added: Boston College
Added: Troy
Added: Wake Forest
Added: Baylor
Added: Bowling Green
Added: TCU
Added: Virginia
Added: Utah State
Added: Appalachian State
Added: Michigan
Added: New Mexico State
Added: SMU
Added: South Carolina
Added: Georgia Southern
Added: Vanderbilt
Added: California
Added: Ohio State
Added: Louisiana Monroe
Added: Louisiana
Added: Eastern Michigan
Added: UMass
Added: Alabama
Added: Miami (OH)
Added: Iowa State
Added: Arizona
Added: UAB
Added: Akron
Added: Cincinnati
Added: Virginia Tech
Added: Tulane
Added: Texas
Added: 

In [12]:
# buildModel function that does not predict games yet so that we can predict 
# upcoming games during the season
def buildModel(team, features, year, randomState):

    # calling function to predict the missing values
    predictMissing(team, fixCols, randomState)
    
    # setting up training and test sets
    # only using data from before the given year because we'll be predicting data from given year
    modelTeam = team[team['Year'] < year]

    # to be used for finding the best alpha level
    X = modelTeam[features]
    y = modelTeam[['scoreDiff']].values.ravel()

    # to be used for predicting predicting the given year's score differentials
    xTrain = modelTeam[features]
    xTest = team[features][team['Year'] == year]
    yTrain = modelTeam[['scoreDiff']].values.ravel()

    # checks if the season we're testing has less than 8 games (meaning they had 
    # half their season canceled for some reason) and returns null if so
    if len(yTrain) < 8:
        return None

    # setting up pipeline
    pipe = Pipeline([
        ('imputer', SimpleImputer(strategy = 'constant', fill_value = 0)),
        ('scaler', StandardScaler()),
        ('regressor', Lasso())
    ])

    # setting up and performing grid search to find best alpha level
    paramGrid = {
        'regressor__alpha': np.logspace(-4, 4, 50)
    }
    
    gridSearch = GridSearchCV(pipe, paramGrid, cv = 5, scoring = 'neg_mean_absolute_error')

    gridSearch.fit(X, y)
    
    bestAlpha = gridSearch.best_params_['regressor__alpha']

    # setting up new pipeline with the ideal alpha level and a bagging regressor
    betterPipe = Pipeline([
        ('imputer', SimpleImputer(strategy = 'constant', fill_value = 0)),
        ('scaler', StandardScaler()),
        ('regressor', BaggingRegressor(estimator = Lasso(alpha = bestAlpha, max_iter = 10000), n_estimators = 50, n_jobs = -1))
    ])

    betterPipe.fit(xTrain, yTrain)

    return betterPipe

Making a dictionary to hold trained models for each team and then saving them to our project folder using joblib

In [14]:
features = [col for col in teamDict['Florida State'].columns if any(word in col for word in ['rolling_sum','talent','SP'])]
fixCols = [col for col in features if any(word in col for word in ['tackle', 'sacks', 'Deflected', 'defensive', 'qbHurries', 'fumbles', 'kickReturn', 'penalty', 'Fumble', 'talent', 'SP'])]

modelDict = {}

for team, teamDF in teamDict.items():

    model = buildModel(teamDF, features, 2024, None)
    modelDict[team] = model
    print(f'{team} model done')  

Louisiana Tech model done
Southern Mississippi model done
Arizona State model done
Auburn model done
Texas Tech model done
Minnesota model done
NC State model done
Georgia model done
USC model done
South Florida model done
Wisconsin model done
Miami model done
Mississippi State model done
Houston model done
San José State model done
Oklahoma State model done
UCLA model done
Rice model done
Texas State model done
Iowa model done
Western Michigan model done
Charlotte model done
Florida International model done
Ole Miss model done
UTEP model done
Boston College model done
Troy model done
Wake Forest model done
Baylor model done
Bowling Green model done
TCU model done
Virginia model done
Utah State model done
Appalachian State model done
Michigan model done
New Mexico State model done
SMU model done
South Carolina model done
Georgia Southern model done
Vanderbilt model done
California model done
Ohio State model done
Louisiana Monroe model done
Louisiana model done
Eastern Michigan model d

Importing joblib so that we can save models to use over the course of the season

In [139]:
import joblib

In [20]:
directory = '/users/blaizelahman/Desktop/CFB Model/Team Models'

for team, model in modelDict.items():
    
    name = team.replace(' ', '_') + '_model_2024.pkl'
    path = f'{directory}/{name}'
    joblib.dump(model, path)


To be able to update opponent data for FCS teams, we'll need to download recent data for FCS teams and format it the same way we did our teamDict teams. Let's do that now

Importing FCS data from collegefootballdata.com

In [29]:
# setting the download directory and Chrome settings
directory = '/Users/blaizelahman/Desktop/CFB Model'
chromeOptions = Options()
prefs = {'download.default_directory': directory}
chromeOptions.add_experimental_option('prefs', prefs)

# these extra steps are due to the current version of chromedriver not being compatible with the 
# current version of chrome at the time of development, feel free to delete or change as it pertains
# to user situation
path = '/Users/blaizelahman/Downloads/chromedriver_104'
olderChromePath = '/Applications/Older Chrome.app/Contents/MacOS/Google Chrome'

chromeOptions.binary_location = olderChromePath

# creating Chrome driver
driver = webdriver.Chrome(service = Service(path), options = chromeOptions)

# dictionary to store game data
allWeeksDict = {}

# downloading weekly data files for FCS teams from years 2021-2023 from collegefootballdata.com
for year in range(2021, 2024):
        
    for week in range(1, 15):
        
        try:
            url = f'https://collegefootballdata.com/exporter/games/teams?year={year}&week={week}&seasonType=regular&classification=FCS'
            driver.get(url)
            time.sleep(3) 
            # clicking the query button
            query = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Query')]")
            query.click()
            time.sleep(3) 

            # clicking the export button
            export = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Export')]")
            export.click()
            time.sleep(3)

            key = f"cfb{year}w0{week}"
            
            # grabs files from CFBData folder
            files = os.listdir(directory)

            # grab the file paths for all files ending in .csv
            filePaths = [os.path.join(directory, name) for name in files if name.endswith('.csv')]

            # grabbing the most recently made file out of those in paths
            file = max(filePaths, key = os.path.getctime)
            
            # loading csv file
            allWeeksDict[key] = pd.read_csv(file)

            # deleting the file after it has been added
            os.remove(file)

        except Exception as e:
            print(f'Data for {year} week {week} not available.')

    print(f'Successfully grabbed data for {year}')

driver.quit()

Data for 2021 week 5 not available.
Data for 2021 week 6 not available.
Data for 2021 week 9 not available.
Data for 2021 week 13 not available.
Data for 2021 week 14 not available.
Successfully grabbed data for 2021
Successfully grabbed data for 2022
Data for 2023 week 14 not available.
Successfully grabbed data for 2023


Reformatting FCS data

In [30]:
from p1_DataImportAndWrangle import customRollingSum

Initializing functions from p1 because they did not import properly

In [31]:
def createTeam(team):

    # getting rid of redundant error warning
    pd.options.mode.chained_assignment = None

    # getting all games with the given team
    teamGames = totalWeeklyData[totalWeeklyData['School'] == team]
    gameIDs = teamGames['Game Id'].unique()
    
    # adding in all opponents of the given team and their stats
    teamDF = totalWeeklyData[totalWeeklyData['Game Id'].isin(gameIDs)]

    # converting the week column to an int so the dataframe can then be sorted by year and week
    teamDF['Week'] = teamDF['Week'].str[-2:].astype(int)
    teamDF = teamDF.sort_values(by = ["Year", "Week"])

    teamDF = teamDF.reset_index(drop=True)

    # adding a total touchdown column
    teamDF['totalTDs'] = teamDF[['passingTDs', 'rushingTDs', 'interceptionTDs', 'kickReturnTDs', 'puntReturnTDs']].sum(axis = 1, skipna = True)

    # merging dataframe to pair teams who played each other by Game Id and differentiating the opponent's stats
    mergeTeamDF = teamDF.merge(teamDF, on='Game Id', suffixes=('', '_opp'))

    # making sure there's no duplicates
    mergeTeamDF = mergeTeamDF[mergeTeamDF['School'] != mergeTeamDF['School_opp']]

    # getting score differentials and point totals
    mergeTeamDF['scoreDiff'] = mergeTeamDF['Points'] - mergeTeamDF['Points_opp']
    mergeTeamDF['pointTotal'] = mergeTeamDF['Points'] + mergeTeamDF['Points_opp']

    # adding a win column to show if the given team won a game or not
    mergeTeamDF['Win'] = mergeTeamDF['scoreDiff'] > 0

    # setting the dataframe to the merged version
    teamDF = mergeTeamDF

    teamDF = teamDF[teamDF['School'] == team]
    
    # making rolling sum columns for selected stats for the past 20 and 8 games  
    for games in [20, 8]:
        for column in ['Points','firstDowns','fumblesLost','fumblesRecovered','interceptions','kickReturnYards','kickingPoints','netPassingYards',
                      'passesDeflected', 'passesIntercepted','passingTDs','puntReturns','qbHurries','rushingAttempts','rushingTDs','rushingYards',
                       'sacks','tacklesForLoss','totalFumbles','totalPenaltiesYards','totalYards','turnovers','yardsPerPass','yardsPerRushAttempt', 'totalTDs']:
            newColumn = 'rolling_sum_' + column + str(games) 
            teamDF[newColumn] = customRollingSum(teamDF[column], games)
            
    # altering yardsPerPass and yardsPerRushAttempt columns to reflect their values over the past 20 and 8 games
    teamDF['rolling_sum_yardsPerPass20'] = teamDF['rolling_sum_yardsPerPass20'] / 20
    teamDF['rolling_sum_yardsPerPass8'] = teamDF['rolling_sum_yardsPerPass8'] / 8

    teamDF['rolling_sum_yardsPerRushAttempt20'] = teamDF['rolling_sum_yardsPerRushAttempt20'] / 20
    teamDF['rolling_sum_yardsPerRushAttempt8'] = teamDF['rolling_sum_yardsPerRushAttempt8'] / 8
    
    return teamDF


def mergeRollingSum(team):

    # making a copy of the teams dataframe from tempDict
    teamDF = tempDict[team].copy() 

    # going through the dataframe and merging the opponent's rolling_sum columns row by row
    for index, row in teamDF.iterrows():

        # grabbing gameIds and each opponent's name to access their dataframes in tempDict
        gameID = row['Game Id']
        oppName = 'temp_' + row['School_opp']
        
        oppDF = tempDict[oppName]
        
        # getting the opponent's rolling_sum columns from the game they played the given team
        oppRow = oppDF[oppDF['Game Id'] == gameID]
        
        rollingCols = [col for col in oppRow.columns if col.startswith('rolling_sum')]

        # merging the opponent's rolling_sum columns on the row the team plays them
        for col in rollingCols:
            teamDF.loc[index, col + '_opp'] = oppRow.iloc[0][col]
    
    return teamDF

In [194]:
# creating a week column to display what week it is, also removes the redundant 
# first row of column names
for name, df in allWeeksDict.items():
    
    # extracts what week it is from the dataframe by getting the last two characters in the name
    week_number = name[-2:]
    
    # converts last two characters into an int
    week_number = int(week_number)

    # creates new week column that displays the week the game took place
    df['Week'] = f"Week {week_number}"

    # checks what year is being shown in the dataframe and displays it in a year column
    year = name[3:7]

    df['Year'] = year

    # updates dataframe
    allWeeksDict[name] = df 
    
# Define totalWeeklyData
totalWeeklyData = pd.concat(allWeeksDict)

# dropping the extra index level that has the name of the original dataframes
totalWeeklyData.reset_index(level = 0, drop = True, inplace = True)

totalWeeklyData.rename(columns = {0: 'Game Id', 1: 'School', 2: 'Conference', 
                                  3: 'HomeAway', 4: 'Points', 5: 'Stat Category', 6: 'Stat'}, inplace = True)

# making a copy of the dataframe to look for non-numeric values
totalWeeklyDataCopy = totalWeeklyData.copy(deep = True) # converting string type data points to numerics
totalWeeklyDataCopy['Stat'] = pd.to_numeric(totalWeeklyDataCopy['Stat'], errors='coerce')

# filter out unique non-numeric rows
nan_stats = totalWeeklyDataCopy[totalWeeklyDataCopy['Stat'].isna()]['Stat Category'].unique()

# replacing the "-" in totalPenaltiesYards, completionAttempts, fourthDownEff, and thirdDownEff
# with ".", and the same with ":" in possessionTime so they can be converted to numeric values
totalWeeklyData['Stat'] = totalWeeklyData['Stat'].str.replace('-', '.')
totalWeeklyData['Stat'] = totalWeeklyData['Stat'].str.replace(':', '.')

totalWeeklyData['Stat'] = pd.to_numeric(totalWeeklyData['Stat'], errors = 'coerce')
# converting points to numerics
totalWeeklyData['Points'] = pd.to_numeric(totalWeeklyData['Points'], errors = 'coerce')

# getting the amount of games in the dataframe before pivoting to compare to after
prePivotedIds = totalWeeklyData['Game Id'].unique()

# pivoting the Stat Category column into multiple columns each with their respective stat
totalWeeklyData = totalWeeklyData.pivot(index=['Game Id', 'School', 'Conference', 'HomeAway', 'Points', 'Week', 'Year'], 
                      columns='Stat Category', 
                      values='Stat').reset_index()

# flattening the columns
totalWeeklyData.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in totalWeeklyData.columns]

# getting the amount of games in the newly pivoted dataframe and ensuring that no game data was lost
pivotedIDs = totalWeeklyData['Game Id'].unique()

# using createTeam to format all FCS teams
tempDict = {}
for team in totalWeeklyData.School.unique():
    dfName = f'temp_{team}'
    tempDict[dfName] = createTeam(team)

# creating a dictionary for formatted FCS teams
fcsDict = {}
for team, teamDF in tempDict.items():
        
    updatedDF = mergeRollingSum(team)  
    keyName = team[5:]

    # not including any current FBS teams
    if keyName in teamDict and teamDict[keyName].shape[0] >= 24:
        continue
        
    fcsDict[keyName] = updatedDF


# setting the download directory and reading in the talent data
path = '/Users/blaizelahman/Desktop/CFB Model/Talent Data/talent_2023.csv'
talentRatings = pd.read_csv(path)

# adding in talent ratings for all FCS teams
for team, teamDF in fcsDict.items():

    teamDF = teamDF.copy()

    talent = talentRatings.loc[(talentRatings['School'] == team) & (talentRatings['Year'] == 2023), 'Talent']

    if not talent.empty:
        # assigning talent column with the corresponding talent ratings for games in the given year
        teamDF.loc[teamDF['Year'] == '2023', 'talent'] = talent.values[0]
        
    else:
        print(f'No talent rating found for team: {team}')

    fcsDict[team] = teamDF
    print(f'Added {team}')

# setting the download directory and reading in the SP+ data
path = '/Users/blaizelahman/Desktop/CFB Model/SP+ Data/SP_2023.csv'
spRatings = pd.read_csv(path)

# adding in SP+ ratings for all FCS teams
for team, teamDF in fcsDict.items():

    teamDF = teamDF.copy()

    SP = spRatings.loc[(spRatings['Team'] == team) & (spRatings['Year'] == 2023), 'Rating']

    if not SP.empty:
        # assigning talent column with the corresponding talent ratings for games in the given year
        teamDF.loc[teamDF['Year'] == '2023', 'SP'] = SP.values[0]
        print(f'SP+ Rating added for {team}')
        
    else:
        teamDF.loc[teamDF['Year'] == '2023', 'SP'] = pd.NA
    
    fcsDict[team] = teamDF

    

# going through all teams in the dictionary and correcting any week 0 games
for team, teamDF in fcsDict.items():

    teamDF = teamDF.copy()
    
    for year in np.unique(teamDF['Year'].values):

        yearDF = teamDF[teamDF['Year'] == year]

        # checks if there's two week 1's
        if np.sum(yearDF['Week'].values == 1) > 1:
            # swapping week 0 and week 1 because the actual week 1 game is before the week 0 game
            teamWeek0 = yearDF.index[0]
            teamWeek1 = yearDF.index[1] 
            teamDF.loc[teamWeek0], teamDF.loc[teamWeek1] = teamDF.loc[teamWeek1].copy(), teamDF.loc[teamWeek0].copy()

            # setting the week 0 game to say week 0
            teamDF.at[teamWeek0, 'Week'] = 0
            
            print(f'Added week 0 for {team} in year {year}')

    fcsDict[team] = teamDF



Added Jacksonville State
No talent rating found for team: Long Island University
Added Long Island University
Added East Tennessee State
Added Mercer
Added Alabama State
Added McNeese
Added Austin Peay
Added Southeast Missouri State
Added Tennessee Tech
Added Arkansas-Pine Bluff
Added Samford
Added Charleston Southern
Added Tennessee State
Added Prairie View
Added Monmouth
Added Northwestern State
Added Bethune-Cookman
Added UT Martin
Added Gardner-Webb
Added Southeastern Louisiana
Added North Carolina Central
Added Hampton
Added Fordham
Added Texas Southern
No talent rating found for team: Colgate
Added Colgate
No talent rating found for team: William & Mary
Added William & Mary
Added South Carolina State
Added North Carolina A&T
Added Kennesaw State
Added Norfolk State
Added Furman
Added Albany
Added Central Connecticut
Added New Hampshire
Added Richmond
Added Wofford
Added Youngstown State
Added Indiana State
Added Villanova
Added Delaware
Added Holy Cross
Added Yale
Added Howard
Ad

Checking that FCS data looks right

In [195]:
fcsDict['Samford'][fcsDict['Samford']['Year'] == '2023']

Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,SP
49,401540625,Samford,Southern,away,7,2,2023,21.34,,15.0,0.3,1.0,0.0,,,0.0,0.0,79.0,6.0,1.0,315.0,,,1.0,18.51,,,,,23.0,0.0,46.0,,,,6.13,2.0,4.55,361.0,1.0,9.3,2.0,1.0,Western Carolina,Southern,home,30,2,2023,29.35,,34.0,1.2,0.0,1.0,,,0.0,0.0,21.0,1.0,12.0,262.0,,,2.0,41.09,0.0,0.0,1.0,,49.0,1.0,284.0,,,,7.13,0.0,4.35,546.0,0.0,7.5,5.8,3.0,-23,37,False,,,,,,,,,,,,,,,,,,,,,,,,,,322.0,216.0,4.0,11.0,2.0,548.0,58.0,2651.0,0.0,7.0,27.0,6.0,0.0,312.0,16.0,1219.0,0.0,0.0,6.0,53.45,3870.0,6.0,7.5125,3.9625,43.0,,,,,,,,,,,,,,,,,,,,,,,,,,206.0,170.0,3.0,5.0,17.0,579.0,60.0,2143.0,1.0,3.0,16.0,10.0,1.0,274.0,8.0,1259.0,1.0,4.0,8.0,59.07,3402.0,20.0,7.475,4.5125,24.0,97.62,
54,401520221,Samford,Southern,away,13,3,2023,19.33,0.0,12.0,1.2,0.0,1.0,0.0,0.0,2.0,0.0,26.0,1.0,1.0,144.0,1.0,2.0,1.0,26.32,,,,3.0,28.0,1.0,74.0,1.0,61.0,5.0,5.14,0.0,1.5,218.0,2.0,4.4,2.6,2.0,Auburn,SEC,home,45,3,2023,27.36,0.0,26.0,2.3,1.0,0.0,0.0,12.0,2.0,0.0,89.0,3.0,9.0,340.0,6.0,2.0,2.0,33.28,0.0,0.8,3.0,2.0,43.0,4.0,222.0,2.0,34.0,7.0,7.14,2.0,5.45,562.0,3.0,9.4,5.2,6.0,-32,58,False,,,,,,,,,,,,,,,,,,,,,,,,,,295.0,215.0,5.0,5.0,1.0,575.0,55.0,2738.0,0.0,7.0,26.0,5.0,0.0,307.0,13.0,1133.0,0.0,0.0,8.0,50.4,3871.0,6.0,7.925,3.625,39.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,97.62,
58,401540583,Samford,Southern,home,24,4,2023,33.48,,27.0,2.4,1.0,0.0,,,2.0,0.0,47.0,1.0,6.0,299.0,,,1.0,27.17,0.0,0.1,1.0,,33.0,2.0,174.0,,,,8.16,1.0,10.104,473.0,3.0,6.2,5.3,3.0,Chattanooga,Southern,away,47,4,2023,21.26,,21.0,1.1,0.0,1.0,1.0,38.0,0.0,,,,11.0,323.0,,1.0,1.0,32.43,,,,,36.0,3.0,106.0,,,,7.12,0.0,4.29,429.0,0.0,12.4,2.9,5.0,-23,71,False,,,,,,,,,,,,,,,,,,,,,,,,,,280.0,208.0,4.0,6.0,3.0,548.0,52.0,2631.0,1.0,9.0,25.0,4.0,3.0,298.0,12.0,1045.0,1.0,5.0,6.0,43.16,3676.0,7.0,7.725,3.4,37.0,,,,,,,,,,,,,,,,,,,,,,,,,,247.0,186.0,1.0,9.0,8.0,305.0,65.0,2250.0,0.0,8.0,18.0,10.0,0.0,302.0,11.0,1181.0,0.0,0.0,7.0,31.89,3431.0,9.0,9.2,3.9375,30.0,97.62,
62,401540591,Samford,Southern,home,42,5,2023,42.5,,34.0,1.2,2.0,0.0,0.0,16.0,2.0,0.0,98.0,4.0,6.0,399.0,,1.0,1.0,33.54,0.0,43.0,4.0,,42.0,5.0,148.0,,,,7.15,2.0,5.38,547.0,4.0,8.0,3.5,6.0,East Tennessee State,Southern,away,28,5,2023,9.23,,15.0,0.2,0.0,2.0,0.0,32.0,1.0,0.0,99.0,4.0,4.0,118.0,,2.0,1.0,26.06,,,,,33.0,3.0,224.0,,,,3.1,0.0,7.81,342.0,1.0,5.1,6.8,4.0,14,70,True,,,,,,,,,,,,,,,,,,,,,,,,,,249.0,201.0,5.0,6.0,5.0,489.0,45.0,2488.0,1.0,8.0,22.0,4.0,3.0,288.0,11.0,1052.0,1.0,5.0,7.0,43.414,3540.0,10.0,7.4125,3.575,33.0,,,,,,,,,,,,,,,,,,,,,,,,,,165.0,137.0,4.0,4.0,11.0,639.0,57.0,1330.0,5.0,5.0,7.0,6.0,5.0,295.0,10.0,1241.0,3.0,7.0,12.0,42.68,2571.0,15.0,5.6625,4.275,18.0,97.62,
65,401540627,Samford,Southern,away,31,6,2023,31.43,,28.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,30.0,2.0,7.0,252.0,,1.0,1.0,32.45,0.0,10.0,1.0,,44.0,3.0,205.0,,,,9.17,,5.38,457.0,1.0,5.9,4.7,4.0,Wofford,Southern,home,10,6,2023,13.32,,14.0,1.1,0.0,0.0,0.0,0.0,1.0,0.0,69.0,3.0,4.0,114.0,,1.0,0.0,27.15,0.0,7.0,2.0,,28.0,1.0,137.0,,,,1.14,,4.35,251.0,1.0,3.6,4.9,1.0,21,41,True,,,,,,,,,,,,,,,,,,,,,,,,,,253.0,205.0,6.0,3.0,7.0,562.0,43.0,2627.0,1.0,9.0,20.0,8.0,3.0,294.0,15.0,1057.0,1.0,5.0,8.0,44.414,3684.0,13.0,7.4875,3.5125,35.0,,,,,,,,,,,,,,,,,,,,,,,,,,151.0,114.0,6.0,4.0,3.0,397.0,41.0,1500.0,1.0,8.0,5.0,9.0,3.0,251.0,12.0,955.1,1.0,3.0,9.0,56.29,2454.0,9.0,6.625,3.45,17.0,97.62,
70,401540601,Samford,Southern,home,21,7,2023,36.48,,22.0,2.4,0.0,0.0,0.0,0.0,0.0,,,,3.0,291.0,,1.0,2.0,27.49,0.0,9.0,1.0,,31.0,1.0,46.0,,,,7.18,1.0,9.84,337.0,0.0,6.1,1.5,3.0,Furman,Southern,away,27,7,2023,19.29,,25.0,1.1,0.0,0.0,,,1.0,0.0,53.0,3.0,9.0,205.0,,,2.0,32.11,0.0,9.0,1.0,,42.0,1.0,211.0,,,,6.14,1.0,4.45,416.0,1.0,7.1,5.0,3.0,-6,48,False,,,,,,,,,,,,,,,,,,,,,,,,,,250.0,210.0,5.0,2.0,7.0,502.0,46.0,2557.0,1.0,8.0,17.0,8.0,3.0,304.0,17.0,1148.0,1.0,5.0,7.0,43.154,3705.0,12.0,7.1625,3.675,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,295.0,167.0,1.0,9.0,6.0,403.0,55.0,1584.0,0.0,11.0,10.0,10.0,3.0,318.0,26.0,1446.0,1.0,7.0,4.0,53.06,3030.0,7.0,7.15,4.4875,39.0,97.62,
73,401540623,Samford,Southern,away,27,8,2023,34.45,,20.0,2.2,0.0,1.0,,,0.0,0.0,21.0,1.0,9.0,229.0,,,0.0,29.44,0.0,27.0,3.0,,37.0,3.0,213.0,,,,9.2,0.0,6.46,442.0,0.0,5.1,5.8,3.0,VMI,Southern,home,14,8,2023,19.34,,17.0,2.4,1.0,0.0,,,0.0,0.0,114.0,6.0,2.0,182.0,,,1.0,30.16,0.0,1.0,2.0,,34.0,1.0,154.0,,,,4.16,1.0,2.1,336.0,1.0,5.4,4.5,2.0,13,41,True,,,,,,,,,,,,,,,,,,,,,,,,,,236.0,203.0,5.0,2.0,7.0,391.0,44.0,2495.0,1.0,7.0,15.0,9.0,3.0,289.0,17.0,1073.0,1.0,5.0,8.0,46.504,3568.0,12.0,6.9875,3.5375,32.0,,,,,,,,,,,,,,,,,,,,,,,,,,107.0,121.0,5.0,5.0,7.0,412.0,23.0,1531.0,2.0,2.0,7.0,11.0,0.0,264.0,6.0,740.0,1.0,8.0,11.0,38.96,2271.0,12.0,7.05,2.6375,14.0,97.62,
77,401540620,Samford,Southern,home,37,9,2023,26.36,,25.0,0.0,0.0,2.0,,,0.0,0.0,23.0,1.0,7.0,353.0,,,3.0,25.14,0.0,23.0,2.0,,33.0,2.0,151.0,,,,5.11,0.0,5.5,504.0,0.0,9.8,4.6,5.0,The Citadel,Southern,away,7,9,2023,11.22,,16.0,2.6,2.0,0.0,,,0.0,0.0,63.0,3.0,1.0,122.0,,,1.0,34.46,0.0,0.0,3.0,,43.0,0.0,186.0,,,,4.15,2.0,4.3,308.0,2.0,5.5,4.3,1.0,30,44,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,213.0,189.0,5.0,2.0,7.0,338.0,39.0,2270.0,1.0,6.0,11.0,12.0,3.0,281.0,18.0,1107.0,1.0,5.0,7.0,48.554,3377.0,12.0,6.6125,3.7625,29.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,87.0,126.0,6.0,1.0,5.0,320.0,19.0,1050.0,1.0,6.0,4.0,4.0,3.0,296.0,7.0,1111.0,2.0,5.0,9.0,55.37,2161.0,11.0,6.575,3.4875,11.0,97.62,
82,401540615,Samford,Southern,away,21,11,2023,25.36,,18.0,2.2,0.0,1.0,,,1.0,0.0,17.0,2.0,3.0,210.0,,,2.0,23.05,0.0,0.1,1.0,,27.0,1.0,112.0,,,,3.12,0.0,6.75,322.0,1.0,5.8,4.1,3.0,Mercer,Southern,home,28,11,2023,18.25,,28.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,62.0,3.0,2.0,137.0,,1.0,0.0,36.55,0.0,0.3,2.0,,49.0,4.0,243.0,,,,7.13,3.0,8.58,380.0,1.0,5.5,5.0,4.0,-7,49,False,638.0,476.0,11.0,16.0,11.0,1174.0,116.0,6039.0,7.0,15.0,48.0,20.0,7.0,698.0,37.0,2623.0,2.0,9.0,17.0,125.014,8662.0,22.0,7.43,3.645,85.0,202.0,183.0,4.0,4.0,7.0,324.0,40.0,2282.0,1.0,5.0,10.0,12.0,3.0,271.0,17.0,1057.0,1.0,5.0,6.0,48.714,3339.0,11.0,6.85,3.75,27.0,676.0,416.0,25.0,13.0,12.0,1187.0,139.0,4857.0,1.0,23.0,42.0,37.0,2.0,768.0,36.0,3422.0,0.0,6.0,35.0,118.603,8279.0,37.0,9.08,4.335,83.0,248.0,142.0,8.0,8.0,2.0,306.0,54.0,1688.0,0.0,8.0,10.0,19.0,0.0,301.0,15.0,1164.0,0.0,0.0,10.0,51.533,2852.0,10.0,9.025,3.775,28.0,97.62,
85,401540543,Samford,Southern,home,27,12,2023,22.37,,25.0,0.1,0.0,2.0,0.0,28.0,0.0,,,,9.0,205.0,,2.0,1.0,29.2,0.0,21.0,3.0,,36.0,1.0,214.0,,,,4.15,2.0,7.77,419.0,0.0,5.5,5.9,2.0,UT Martin,Big South-OVC,away,17,12,2023,23.38,,21.0,3.4,2.0,0.0,,,2.0,0.0,36.0,3.0,5.0,261.0,,,1.0,30.4,0.0,0.1,2.0,,31.0,1.0,184.0,,,,2.13,2.0,11.104,445.0,4.0,6.9,5.9,2.0,10,44,True,607.0,466.0,11.0,17.0,11.0,993.0,109.0,5833.0,6.0,15.0,47.0,21.0,3.0,693.0,35.0,2621.0,2.0,7.0,16.0,128.464,8454.0,22.0,7.33,3.67,82.0,216.0,186.0,3.0,5.0,8.0,262.0,42.0,2177.0,1.0,5.0,11.0,13.0,3.0,275.0,18.0,1123.0,1.0,5.0,4.0,50.914,3300.0,11.0,6.4125,4.0125,29.0,672.0,415.0,14.0,13.0,16.0,969.8,137.0,4746.0,8.0,23.0,39.0,29.0,3.0,701.0,44.0,3915.0,5.0,20.0,29.0,136.81,8661.0,30.0,6.97,5.375,87.0,287.0,174.0,9.0,3.0,5.0,475.0,58.0,1887.0,0.0,7.0,21.0,12.0,0.0,325.0,15.0,1710.0,0.0,0.0,14.0,49.07,3597.0,14.0,7.275,5.1,37.0,97.62,


Saving FCS data so we can easily refer back to it mid season

In [197]:
directory = '/users/blaizelahman/Desktop/CFB Model/FCS Data'

for key, team in fcsDict.items():

    name = key.replace(' ', '_') + '_data.csv'
    path = f'{directory}/{name}'
    team.to_csv(path)
    print('CSV: ' + name)

CSV: Jacksonville_State_data.csv
CSV: Long_Island_University_data.csv
CSV: East_Tennessee_State_data.csv
CSV: Mercer_data.csv
CSV: Alabama_State_data.csv
CSV: McNeese_data.csv
CSV: Austin_Peay_data.csv
CSV: Southeast_Missouri_State_data.csv
CSV: Tennessee_Tech_data.csv
CSV: Arkansas-Pine_Bluff_data.csv
CSV: Samford_data.csv
CSV: Charleston_Southern_data.csv
CSV: Tennessee_State_data.csv
CSV: Prairie_View_data.csv
CSV: Monmouth_data.csv
CSV: Northwestern_State_data.csv
CSV: Bethune-Cookman_data.csv
CSV: UT_Martin_data.csv
CSV: Gardner-Webb_data.csv
CSV: Southeastern_Louisiana_data.csv
CSV: North_Carolina_Central_data.csv
CSV: Hampton_data.csv
CSV: Fordham_data.csv
CSV: Texas_Southern_data.csv
CSV: Colgate_data.csv
CSV: William_&_Mary_data.csv
CSV: South_Carolina_State_data.csv
CSV: North_Carolina_A&T_data.csv
CSV: Kennesaw_State_data.csv
CSV: Norfolk_State_data.csv
CSV: Furman_data.csv
CSV: Albany_data.csv
CSV: Central_Connecticut_data.csv
CSV: New_Hampshire_data.csv
CSV: Richmond_data.

Creating a function that will grab the betting and rolling sum data for all teams for the upcoming week and add them to each team's dataframe.

In [100]:
from p1_DataImportAndWrangle import customRollingSum

Manually importing getPreferredLine and correctSpread functions from p4 because they did not import correctly

In [101]:
# function to get line from most preferred available line
def getPreferredLine(group, lineProviders):

    # go through preferred lines and if our preferred providers are there, return the line
    for provider in lineProviders:
        preferredLine = group[group['LineProvider'] == provider]

        # if preferred line provider has line, return
        if not preferredLine.empty:
            return preferredLine.iloc[0]

    # returning first available line if none of our preferred ones are available
    return group.iloc[0] 

# modified version of the correctSpread function from p4 that only corrects the spread on the last row
def correctSpread(row, team):
    if row.index == -1 and 'HomeTeam' in row.index and row['HomeTeam'] == team:
        row['Spread'] = float(row['Spread']) * -1
    return row

In [333]:
def grabUpcomingWeekData(year, week, teamDict, fcsDict):

    # setting the download directory and Chrome settings
    directory = '/Users/blaizelahman/Desktop/CFB Model'
    chromeOptions = Options()
    prefs = {'download.default_directory': directory}
    chromeOptions.add_experimental_option('prefs', prefs)
    
    # these extra steps are due to the current version of chromedriver not being compatible with the 
    # current version of chrome at the time of development, feel free to delete or change as it pertains
    # to user situation
    path = '/Users/blaizelahman/Downloads/chromedriver_104'
    olderChromePath = '/Applications/Older Chrome.app/Contents/MacOS/Google Chrome'

    chromeOptions.binary_location = olderChromePath
    
    # creating Chrome driver
    driver = webdriver.Chrome(service = Service(path), options = chromeOptions)

    # setting the link that directs to the betting data
    bettingDataLink = f'https://collegefootballdata.com/exporter/lines?year={year}&week={week}&seasonType=regular'
    
    driver.get(bettingDataLink)
    time.sleep(4) 
            
    # clicking the query button
    query = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Query')]")
    query.click()
    time.sleep(3) 
            
    # clicking the export button
    export = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Export')]")
    export.click()
    time.sleep(3)

    key = str(year)
            
    # grabs files from CFBData folder
    files = os.listdir(directory)
        
    # grab the file paths for all files ending in .csv
    filePaths = [os.path.join(directory, name) for name in files if name.endswith('.csv')]

    # grabbing the most recently made file out of those in paths
    file = max(filePaths, key=os.path.getctime)
            
    # loading csv file
    bettingData = pd.read_csv(file)

    # deleting the file after it has been added
    os.remove(file)

    driver.quit()

    # setting our preferred line providers
    lineProviders = ['DraftKings', 'consensus', 'Bovada']

    # getting the preferred line for each individual game
    preferredLines = bettingData.groupby('Id').apply(lambda x: getPreferredLine(x, lineProviders)).reset_index(drop = True)

    # adding a new row to each home team's dataframe with betting data and both teams' rolling sum data
    for index, row in preferredLines.iterrows():
    
        # grabbing the home team's name if it's in teamDict
        if row['HomeTeam'] in teamDict:
            homeTeam = row['HomeTeam']

        # grabbing if home team's name if it's in fcsDict
        elif row['HomeTeam'] in fcsDict:
            homeTeam = row['HomeTeam']
            
        else: 
            homeTeam = None

        # grabbing the away team's name if it's in teamDict
        if row['AwayTeam'] in teamDict: 
            awayTeam = row['AwayTeam']

        # grabbing if home team's name if it's in fcsDict
        elif row['AwayTeam'] in fcsDict:
            awayTeam = row['AwayTeam']
            
        else:
            awayTeam = None

        rowDF = pd.DataFrame([row])

        # adding new row with betting data for the home team if they're in teamDict
        if homeTeam != None:

            # checking if team is in teamDict and adding new row if so
            if homeTeam in teamDict:

                # correcting the spread format
                if row['HomeTeam'] == homeTeam:
                    rowDF.iloc[0, rowDF.columns.get_loc('Spread')] = float(rowDF.iloc[0, rowDF.columns.get_loc('Spread')]) * -1
                
                # grabbing the teams' dataframes and adding a new row with the betting data
                homeDF = teamDict[homeTeam] 
                homeDF = pd.concat([homeDF, rowDF], ignore_index = True)
                homeDF.reset_index(drop = True, inplace = True)
                teamDict[homeTeam] = homeDF
    

            # adding new row to FCS team if team isn't in teamDict
            else:

                # correcting the spread format
                if row['HomeTeam'] == homeTeam:
                    rowDF.iloc[0, rowDF.columns.get_loc('Spread')] = float(rowDF.iloc[0, rowDF.columns.get_loc('Spread')]) * -1
                    
                # grabbing the teams' dataframes and adding a new row with the betting data
                homeDF = fcsDict[homeTeam] 
                homeDF = pd.concat([homeDF, rowDF], ignore_index = True)
                homeDF.reset_index(drop = True, inplace = True)
                fcsDict[homeTeam] = homeDF
                

        # adding new row with betting data for the away team if they're in teamDict
        if awayTeam != None:

            # checking if team is in teamDict and adding new row if so
            if awayTeam in teamDict:

                # correcting the spread format
                if row['HomeTeam'] == homeTeam:
                    rowDF.iloc[0, rowDF.columns.get_loc('Spread')] = float(rowDF.iloc[0, rowDF.columns.get_loc('Spread')]) * -1
                
                # grabbing the teams' dataframes and adding a new row with the betting data
                awayDF = teamDict[awayTeam] 
                awayDF = pd.concat([awayDF, rowDF], ignore_index = True)
                awayDF.reset_index(drop = True, inplace = True)
                teamDict[awayTeam] = awayDF

            else:

                # correcting the spread format
                if row['HomeTeam'] == homeTeam:
                    rowDF.iloc[0, rowDF.columns.get_loc('Spread')] = float(rowDF.iloc[0, rowDF.columns.get_loc('Spread')]) * -1
                
                # grabbing the teams' dataframes and adding a new row with the betting data
                awayDF = fcsDict[awayTeam] 
                awayDF = pd.concat([awayDF, rowDF], ignore_index = True)
                awayDF.reset_index(drop = True, inplace = True)
                teamDict[awayTeam] = awayDF
                
    # grabbing all of the rolling columns that we will have to update (not including opposing columns)
    rollingCols = [col for col in teamDict['Florida State'].columns if 'rolling_sum' in col and '_opp' not in col]

    # grabbing rolling sum data for teams in teamDict
    for team, teamDF in teamDict.items():

        # checking if a new row has been initialized for the upcoming week and, if it hasn't,
        # say that the team isn't playing this week and skip it
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            print(f'{team} not playing this week')
            continue
    
        for col in rollingCols:
            
            # grabbing the column and window of games to pull from
            parts = col.split('_')
            columnName = parts[2]

            if columnName[-1] == '8':
                columnName = columnName[:-1]
                window = 8
            else:
                columnName = columnName[:-2]
                window = 20
            
            # grabbing the rolling sum and setting the given column in the most recent row with it
            rollingSum = customRollingSum(teamDF[columnName], window)

            # correcting yardsPerPass and yardPerRushAttempt columns as we did in p1
            if col == 'rolling_sum_yardsPerPass20' or col == 'rolling_sum_yardsPerRushAttempt20':
                rollingSum = rollingSum / 20

            if col == 'rolling_sum_yardsPerPass8' or col == 'rolling_sum_yardsPerRushAttempt8':
                rollingSum = rollingSum / 8
                
            teamDF.at[teamDF.index[-1], col] = rollingSum.iloc[-1]

    # grabbing rolling sum data for teams in fcsDict
    for team, teamDF in fcsDict.items():

        # checking if a new row has been initialized for the upcoming week and, if it hasn't,
        # say that the team isn't playing this week and skip it
        if pd.isna(teamDF.iloc[-1]['School']) == False and team not in teamDict:
            continue

        
        for col in rollingCols:
            
            # grabbing the column and window of games to pull from
            parts = col.split('_')
            columnName = parts[2]

            if columnName[-1] == '8':
                columnName = columnName[:-1]
                window = 8
            else:
                columnName = columnName[:-2]
                window = 20
            
            # grabbing the rolling sum and setting the given column in the most recent row with it
            rollingSum = customRollingSum(teamDF[columnName], window)

            # correcting yardsPerPass and yardPerRushAttempt columns as we did in p1
            if col == 'rolling_sum_yardsPerPass20' or col == 'rolling_sum_yardsPerRushAttempt20':
                rollingSum = rollingSum / 20

            if col == 'rolling_sum_yardsPerPass8' or col == 'rolling_sum_yardsPerRushAttempt8':
                rollingSum = rollingSum / 8

            
            teamDF.at[teamDF.index[-1], col] = rollingSum.iloc[-1]

    # modified version of the 'mergeRollingSum' function from p1 that will grab opponent rolling_sum
    # data for each game, will also grab SP+ and talent data from past week (unless it's week 1,
    # which will be skipped as a different function will be used to grab new data for the upcoming year)
    for team, teamDF in teamDict.items():

        # checking if a new row has been initialized for the upcoming week and, if it hasn't,
        # skip it
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            continue
        
        teamDF = teamDict[team].copy()

        # grabbing most recent game and index
        lastRow = teamDF.iloc[-1]
        lastIndex = teamDF.index[-1]
    
        # grabbing the game id and opponent's name to access the game in their dataframe
        gameID = lastRow['Id']

        if team == lastRow['HomeTeam']:
            oppName = lastRow['AwayTeam']
        else:
            oppName = lastRow['HomeTeam']

        
        # grabbing the opponent's dataframe based on what dictionary they're in
        if oppName in teamDict:
            
            oppDF = teamDict[oppName]
            
        elif oppName in fcsDict: 
            oppDF = fcsDict[oppName]

        else: 
            continue

        # grabbing the opponent's rolling_sum columns from the game they played against the given team
        oppRow = oppDF[oppDF['Id'] == gameID]

        if not oppRow.empty:
            
            # updating rolling sum columns for opposing team
            for col in rollingCols:
                    
                teamDF.loc[lastIndex, col + '_opp'] = oppRow.iloc[0][col]

        # skipping the talent and SP+ rating step if it's week 1 because this will be done using
        # grabUpcomingYearTalent and grabUpcomingYearSP
        if week == 1:
            teamDict[team] = teamDF
            continue
            
        # ensure columns exist and set them as NaN values if not
        if 'talent' not in teamDF.columns:
            teamDF['talent'] = pd.NA
        if 'SP' not in teamDF.columns:
            teamDF['SP'] = pd.NA
        
        # assigning talent and SP+ ratings from the previous row
        for col in ['talent', 'SP']:
            teamDF.loc[lastIndex, col] = teamDF.loc[lastIndex - 1, col]

        teamDict[team] = teamDF

    # adding in the opposing team's talent and SP+ rating
    for team, teamDF in teamDict.items():

        # checking if a new row has been initialized for the upcoming week and, if it hasn't,
        # skip it
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            continue
        
        teamDF = teamDict[team].copy()

        # grabbing most recent game and index
        lastRow = teamDF.iloc[-1]
        lastIndex = teamDF.index[-1]

        if team == lastRow['HomeTeam']:
            oppName = lastRow['AwayTeam']
        else:
            oppName = lastRow['HomeTeam']

        # grabbing the opponent's dataframe based on what dictionary they're in
        if oppName in teamDict:
            
            oppDF = teamDict[oppName]

            # setting if a team is in the FCS or not because the naming conventions for columns differs
            fcs = False
            
        elif oppName in fcsDict and oppName not in teamDict: 
            
            oppDF = fcsDict[oppName]

            # setting if a team is in the FCS or not because the naming conventions for columns differs
            fcs = True

        else: 
            continue

        # skipping the talent and SP+ adding process if it's week 1
        if week == 1:
            teamDict[team] = teamDF
            continue

        # seeing if a team is in the FCS or not because the naming conventions for columns differs
        if fcs == False:
            
            # grabbing the opposing team's most recent played game to grab their talent and SP+ data
            filteredOppDF = oppDF[oppDF['Year'] == year]

        if fcs == True:
            filteredOppDF = oppDF[oppDF['Year'] == str(year)]

        # checking if dataframe is empty
        if filteredOppDF.empty:
            print(f"No data for opponent in year {year}: {oppName}")
            continue 

        oppRow = filteredOppDF.iloc[0]

        # ensure columns exist and set them as NaN values if not
        if 'talent' not in oppRow.index:
            oppRow['talent'] = pd.NA
        if 'SP' not in oppRow.index:
            oppRow['SP'] = pd.NA
            
        # adding oppossing team talent and SP+ ratings
        for col in ['talent', 'SP']:
            teamDF.loc[lastIndex, col + '_opp'] = oppRow[col]

        teamDict[team] = teamDF
        
        

In [462]:
grabUpcomingWeekData(2023, 5, teamDict, fcsDict)

Wisconsin not playing this week
Miami not playing this week
San José State not playing this week
Oklahoma State not playing this week
UCLA not playing this week
Florida International not playing this week
Wake Forest not playing this week
New Mexico State not playing this week
Ohio State not playing this week
Washington State not playing this week
Florida State not playing this week
North Carolina not playing this week
Kansas State not playing this week
Army not playing this week
Florida Atlantic not playing this week
Colorado State not playing this week
UT San Antonio not playing this week
Ohio not playing this week
No data for opponent in year 2023: Coastal Carolina


Checking that this works as intended by looking at a team that is playing this week (Georgia) and a team with a bye week (Florida State)

In [339]:
teamDict['Georgia'][-1:]

Unnamed: 0.1,Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,talent_opp,SP,SP_opp,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
194,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,771.0,485.0,12.0,8.0,12.0,768.0,191.0,5755.0,65.0,16.0,42.0,31.0,23.0,733.0,51.0,3810.0,48.0,113.0,24.0,98.99,9565.0,24.0,9.015,5.14,95.0,306.0,198.0,4.0,1.0,4.0,329.0,82.0,2310.0,23.0,4.0,17.0,11.0,9.0,286.0,20.0,1562.0,20.0,40.0,9.0,38.49,3872.0,8.0,9.4625,5.4125,37.0,501.0,368.0,17.0,16.0,17.0,829.0,131.0,3620.0,72.0,13.0,19.0,36.0,54.0,783.0,39.0,3653.0,49.0,113.0,40.0,149.875,7273.0,34.0,6.39,4.565,60.0,242.0,150.0,8.0,7.0,5.0,442.0,60.0,1027.0,31.0,9.0,8.0,16.0,25.0,352.0,19.0,1887.0,20.0,46.0,13.0,66.555,2914.0,13.0,5.35,5.35,29.0,977.87,770.94,31.2,7.6,401520280.0,Auburn,20.0,Georgia,27.0,DraftKings,45.5,14.0,Georgia -14,18.5,,440.0,-600.0


In [340]:
teamDict['Florida State'][-1:]

Unnamed: 0.1,Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,talent_opp,SP,SP_opp,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
169,169.0,401411138.0,Florida State,ACC,home,28.0,7.0,2022.0,24.42,0.0,28.0,1.4,1.0,0.0,,,0.0,0.0,14.0,1.0,4.0,254.0,3.0,,2.0,28.46,0.0,30.0,2.0,1.0,34.0,2.0,206.0,3.0,37.0,4.0,7.13,1.0,3.17,460.0,1.0,6.0,6.1,4.0,Clemson,ACC,away,34.0,7.0,2022.0,15.23,0.0,18.0,1.1,0.0,1.0,,,0.0,0.0,69.0,1.0,10.0,203.0,4.0,,3.0,31.14,,,,6.0,44.0,1.0,167.0,2.0,30.0,9.0,9.17,0.0,8.72,370.0,0.0,8.8,3.8,4.0,-6.0,62.0,False,585.0,409.0,12.0,11.0,18.0,895.0,115.0,4487.0,71.0,20.0,34.0,28.0,61.0,724.0,36.0,3636.0,50.0,136.0,23.0,158.254,8123.0,30.0,8.325,4.88,73.0,235.0,179.0,3.0,4.0,7.0,438.0,45.0,2024.0,24.0,8.0,15.0,10.0,26.0,284.0,15.0,1484.0,20.0,55.0,8.0,71.25,3508.0,10.0,8.4,5.0875,31.0,630.0,418.0,11.0,16.0,12.0,742.0,172.0,4346.0,93.0,18.0,33.0,37.0,88.0,755.0,42.0,3457.0,59.0,150.0,30.0,132.36,7803.0,23.0,6.685,4.5,75.0,300.0,167.0,3.0,7.0,3.0,205.0,82.0,1831.0,44.0,8.0,18.0,17.0,41.0,310.0,18.0,1455.0,18.0,62.0,9.0,52.49,3286.0,6.0,7.275,4.7375,36.0,780.76,,13.2,,401411138.0,Florida State,28.0,Clemson,34.0,consensus,51.0,-4.5,Clemson -4.5,,,,


Now that we have a function that can grab game data for the upcoming week, let's make a function that can predict games based off of this data and place the predictions, spread, and spread differential in a dictionary.

In [136]:
features = [col for col in teamDict['Florida State'].columns if any(word in col for word in ['rolling_sum','talent','SP'])]

In [479]:
def predictUpcomingWeek(modelDict, teamDict, features):

    predsDict = {}

    sortedModels = {key: modelDict[key] for key in sorted(modelDict)}

    for team, model in sortedModels.items():

        pred = None
        spread = None
        spreadDiff = None
        
        # only looking at teams with a game this week
        if pd.isna(teamDict[team].iloc[-1]['School']) == True:

            # skipping a team if its game has already been predicted
            homeTeam = teamDict[team].iloc[-1]['HomeTeam'] == team

            # checking if the given team is the home team or not
            if homeTeam == True:

                # getting the opposing team's name
                oppTeam = teamDict[team].iloc[-1]['AwayTeam']

                # skipping the prediction if that game has already been predicted by other team
                if oppTeam < team and oppTeam in sortedModels:

                    predsDict[team] = f'Already predicted by {oppTeam}'
                    continue

            else:

                # getting the opposing team's name
                oppTeam = teamDict[team].iloc[-1]['HomeTeam']
                
                # skipping the prediction if that game has already been predicted by other team
                if oppTeam < team and oppTeam in sortedModels:

                    predsDict[team] = f'Already predicted by {oppTeam}'
                    continue
                

            # grabbing feature data and predicting a score differential
            X = teamDict[team].iloc[-1][features].to_frame().T
            pred = model.predict(X)
            pred = round(pred[0] * 2) / 2

            # grabbing the Vegas spread and spread differential
            spread = teamDict[team].iloc[-1]['Spread']
            spreadDiff = pred - spread

            # grabbing the gameID from the game
            gameID = teamDict[team].iloc[-1]['Id']

        else: 
            predsDict[team] = 'No game this week'
            continue

        # creating a list of teams that have incomplete data due to being added to the FBS too recently
        skipList = ['Jacksonville State', 'James Madison', 'Kennesaw State', 
                    'Coastal Carolina', 'Liberty', 'Sam Houston State']

        # skipping predicting teams on the skip list because the predictions are inaccurate due to bad data
        if homeTeam in skipList or oppTeam in skipList:
            predsDict[team] = 'Playing team with incomplete data'
            continue

        cover = np.nan if pd.isna(spreadDiff) else -1 if spreadDiff < 0 else 1 if spreadDiff > 0 else 0

        predsDict[team] = [pred, spread, spreadDiff, cover, gameID, team, oppTeam]

    return predsDict



In [140]:
directory = '/users/blaizelahman/Desktop/CFB Model/Team Models'
pattern = os.path.join(directory, '*2024*.pkl')

teamFiles = glob.glob(pattern)

modelDict2 = {}

for file in teamFiles:

    baseName = os.path.basename(file)
    
    key = baseName.split('_model')[0]
    key = key.replace('_', ' ')
    
    # Load your DataFrame
    modelDict2[key] = joblib.load(file)

    print(f'Added: {key}')


Added: Georgia State
Added: Florida International
Added: Bowling Green
Added: Virginia
Added: Texas State
Added: Rutgers
Added: Michigan State
Added: Iowa
Added: BYU
Added: West Virginia
Added: East Carolina
Added: SMU
Added: Northwestern
Added: Louisiana
Added: Hawai'i
Added: South Florida
Added: Georgia
Added: Georgia Tech
Added: Tulane
Added: Wake Forest
Added: Iowa State
Added: Alabama
Added: Temple
Added: Marshall
Added: Utah State
Added: Middle Tennessee
Added: Missouri
Added: UCF
Added: Cincinnati
Added: Nebraska
Added: Kansas
Added: Ohio
Added: North Texas
Added: Northern Illinois
Added: Louisiana Tech
Added: Syracuse
Added: Washington
Added: Purdue
Added: Arkansas
Added: Nevada
Added: Wyoming
Added: UMass
Added: New Mexico
Added: Colorado
Added: Oregon State
Added: Idaho
Added: Maryland
Added: Ohio State
Added: NC State
Added: Michigan
Added: Tennessee
Added: Penn State
Added: Buffalo
Added: Oklahoma
Added: Kansas State
Added: Western Michigan
Added: Ole Miss
Added: Stanford
A

In [474]:
predsDict = predictUpcomingWeek(modelDict2, teamDict, features)

Checking that this works as intended

In [475]:
predsDict['Georgia']

'Already predicted by Auburn'

In [480]:
predsDict['Auburn']

[-15.5, -14.0, -1.5, -1, 401520280.0, 'Auburn', 'Georgia']

Now that we can see that this works, let's create a function that will go through all of the predictions and output all the results in order of how likely they are to be successful based off our our bin data. 

In [145]:
path = '/users/blaizelahman/Desktop/CFB Model/Bin Data/Bin_Data.csv'
binData = pd.read_csv(path)

In [493]:
def printPredictions(predsDict, binData):

    # setting up lists that will hold all bets with their respective teams, whether 
    # or not they will cover, and their distinctions
    bestList = []
    greatList = []
    goodList = []
    normalList = []
    tossUpList = []

    # going through predictions and seeing if our model thinks teams and binning the 
    # spread differentials
    for team, preds in predsDict.items():

        # skipping if no prediction or game already predicted
        if len(preds) != 7:
            continue
            
        spreadDiff = preds[2]

        # setting a value for cover and marking whether a team is predicted to cover (1) or not (-1)
        # (0 means toss up)
        cover = 1 if spreadDiff > 0 else -1 if spreadDiff < 0 else 0 

        # adding the prediction to the toss up list and skipping to the next one
        if cover == 0:
            tossUpList.append([team, cover, spreadDiff])
            continue

        # grabbing the success rate from the bin that spreadDiff belongs to
        predBin = binData[(binData['lowerBin'] <= spreadDiff) & (binData['upperBin'] > spreadDiff)]

        if not predBin.empty:

            successRate = predBin.iloc[0]['successRate']

        else: 
            print(f'No bin found for spreadDiff {spreadDiff} \n')


        # adding the prediction to its respective list based on success rate
        if successRate < 0.595:
            normalList.append([team, cover, preds[0], successRate])
        elif successRate <= 0.645:
            goodList.append([team, cover, preds[0], successRate])
        elif successRate <= 0.695:
            greatList.append([team, cover, preds[0], successRate])
        else: 
            bestList.append([team, cover, preds[0], successRate])

    # sorting the games by success rate in each list so they print out in 
    # order of best success rate to worst
    normalList.sort(key = lambda x: x[3], reverse = True)
    goodList.sort(key = lambda x: x[3], reverse = True)
    greatList.sort(key = lambda x: x[3], reverse = True)
    bestList.sort(key = lambda x: x[3], reverse = True)

    print('Games with a greater than 70% success rate: \n')
    if len(bestList) == 0:
        print('No games above a 70% success rate this week.')
    else:
        for game in bestList:
            if game[1] == -1:
                print(f'{game[0]}: NOT COVER. Predicted score differential: ' \
                f'{game[2]}. Historical success rate: {(game[3] * 100):.2f}%.')
  
            else:
                print(f'{game[0]}: COVER. Predicted score differential of ' \
                f'{game[2]}. Historical success rate {(game[3] * 100):.2f}%.')

    print()
    

    print('Games with a 65-70% success rate: \n')
    if len(greatList) == 0:
        print('No games with a 65-70% success rate this week.')
    else:
        for game in greatList:
            if game[1] == -1:
                print(f'{game[0]}: NOT COVER. Predicted score differential: ' \
                f'{game[2]}. Historical success rate: {(game[3] * 100):.2f}%.')
  
            else:
                print(f'{game[0]}: COVER. Predicted score differential of ' \
                f'{game[2]}. Historical success rate {(game[3] * 100):.2f}%.')

    print()

    print('Games with a 60-65% success rate: \n')
    if len(goodList) == 0:
        print('No games with a 60-65% success rate this week.')
    else:
        for game in goodList:
            if game[1] == -1:
                print(f'{game[0]}: NOT COVER. Predicted score differential: ' \
                f'{game[2]}. Historical success rate: {(game[3] * 100):.2f}%.')
  
            else:
                print(f'{game[0]}: COVER. Predicted score differential of ' \
                f'{game[2]}. Historical success rate {(game[3] * 100):.2f}%.')

    print()

    print('Games with a less than 60% success rate: \n')
    if len(normalList) == 0:
        print('No games with a lower than 60% success rate this week.')
    else:
        for game in normalList:
            if game[1] == -1:
                print(f'{game[0]}: NOT COVER. Predicted score differential: ' \
                f'{game[2]}. Historical success rate: {(game[3] * 100):.2f}%.')
  
            else:
                print(f'{game[0]}: COVER. Predicted score differential of ' \
                f'{game[2]}. Historical success rate {(game[3] * 100):.2f}%.')
                

In [347]:
teamDict['Missouri'][-1:]

Unnamed: 0.1,Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,talent_opp,SP,SP_opp,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
187,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,603.0,407.0,9.0,18.0,10.0,517.0,181.0,4977.0,79.0,15.0,30.0,14.0,64.0,737.0,36.0,3191.0,62.0,145.0,15.0,154.24,8168.0,19.0,8.495,4.345,68.0,287.0,177.0,1.0,10.0,6.0,201.0,83.0,2140.0,26.0,6.0,14.0,4.0,28.0,294.0,17.0,1432.0,24.0,51.0,6.0,65.82,3572.0,7.0,8.3625,4.8625,32.0,517.0,372.0,14.0,18.0,17.0,852.0,105.0,4251.0,59.0,15.0,38.0,21.0,34.0,717.0,22.0,2886.0,29.0,96.0,27.0,113.92,7137.0,31.0,6.74,3.825,62.0,222.0,156.0,7.0,3.0,10.0,543.0,48.0,1785.0,26.0,9.0,17.0,10.0,20.0,287.0,8.0,1129.0,14.0,40.0,12.0,46.05,2914.0,17.0,7.125,3.8625,27.0,755.4,639.88,19.3,-12.7,401520293.0,Vanderbilt,21.0,Missouri,38.0,DraftKings,54.5,13.5,Missouri -13.5,13.5,56.5,410.0,-550.0


In [494]:
printPredictions(predsDict, binData)

Games with a greater than 70% success rate: 

No games above a 70% success rate this week.

Games with a 65-70% success rate: 

Rutgers: NOT COVER. Predicted score differential: 29.0. Historical success rate: 68.13%.
Iowa State: COVER. Predicted score differential of -6.0. Historical success rate 67.24%.
Central Michigan: NOT COVER. Predicted score differential: -6.5. Historical success rate: 65.25%.
Kent State: NOT COVER. Predicted score differential: -28.0. Historical success rate: 65.25%.
Michigan: COVER. Predicted score differential of 29.5. Historical success rate 64.57%.

Games with a 60-65% success rate: 

Alabama: COVER. Predicted score differential of 25.0. Historical success rate 63.73%.
Arkansas State: COVER. Predicted score differential of 9.5. Historical success rate 63.73%.
Bowling Green: COVER. Predicted score differential of -13.0. Historical success rate 63.73%.
North Texas: COVER. Predicted score differential of 26.0. Historical success rate 63.73%.
Oregon: COVER. Pre

This seems to work as intended. Now we will write functions to be used at week 1 of each year to add the year's talent and SP+ ratings. These functions assume that the upcoming week 1 games have been added.

In [308]:
def grabUpcomingYearTalent(year, teamDict, fcsDict):

    # importing in team talent data gathered from collegefootballdata.com
    path = f'/Users/blaizelahman/Desktop/CFB Model/Talent Data/talent_{year}.csv'
    talentRatings = pd.read_csv(path)

    # grabbing team talent data for FBS teams
    for team, teamDF in teamDict.items():

        teamDF = teamDF.copy()
        
        # checking that the team is playing in week 1 and skipping any team that isn't
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            continue

        # setting the year for the week 1 row
        teamDF.loc[teamDF.index[-1], 'Year'] = year

        # grabbing team talent rating from talentRatings
        talent = talentRatings.loc[talentRatings['School'] == team, 'Talent']
              
        if len(talent.values) == 0:
            print(f'No talent rating for {team}')
            continue

        # assigns talent column with the corresponding talent ratings for games in the given year
        teamDF.loc[teamDF['Year'] == year, 'talent'] = talent.values[0]

        teamDict[team] = teamDF

    # grabbing team talent data for FCS teams
    for team, teamDF in fcsDict.items():

        teamDF = teamDF.copy()
        
        # checking that the team is playing in week 1 and skipping any team that isn't
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            continue

        # setting the year for the week 1 row
        teamDF.loc[teamDF.index[-1], 'Year'] = str(year)
        
        # grabbing team talent rating from talentRatings
        talent = talentRatings.loc[talentRatings['School'] == team, 'Talent']

        if len(talent.values) == 0:
            print(f'No talent rating for {team}')
            continue
            
        # assigns talent column with the corresponding talent ratings for games in the given year
        teamDF.loc[teamDF['Year'] == str(year), 'talent'] = talent.values[0]

        fcsDict[team] = teamDF

    # now grabbing the opponent's talent data and updating each team's dataframes with it
    for team, teamDF in teamDict.items():

        teamDF = teamDF.copy()

        teamDF['talent_opp'] = np.nan

        lastRow = teamDF.iloc[-1]

        if team == lastRow['HomeTeam']:
            
            # grabbing each opponent's name to access their talent rating in talentRatings
            oppName = lastRow['AwayTeam']

        if team == lastRow['AwayTeam']:
            
            # grabbing each opponent's name to access their talent rating in talentRatings
            oppName = lastRow['HomeTeam']
            
         
        # getting the opponent's talent rating column from the game they played the given team
        oppRow = talentRatings[talentRatings['School'] == oppName]
    
        if not oppRow.empty:

            oppTalent = oppRow.iloc[0]['Talent']
        
            # merging the opponent's talent column on the row the team plays them
            teamDF.loc[teamDF.index[-1], 'talent_opp'] = oppTalent 

        teamDict[team] = teamDF
        
        

In [317]:
def grabUpcomingYearSP(year, teamDict):
    
    # importing in team SP+ data gathered from collegefootballdata.com
    path = f'/Users/blaizelahman/Desktop/CFB Model/SP+ Data/SP_{year}.csv'
    spRatings = pd.read_csv(path)

    # grabbing team talent data
    for team, teamDF in teamDict.items():

        teamDF = teamDF.copy()
        
        # checking that the team is playing in week 1 and skipping any team that isn't
        if pd.isna(teamDF.iloc[-1]['School']) == False:
            continue

        # setting the year for the week 1 row
        teamDF.loc[teamDF.index[-1], 'Year'] = year

        # grabbing team talent rating from talentRatings
        sp = spRatings.loc[spRatings['Team'] == team, 'Rating']
              
        if len(sp.values) == 0:
            continue

        # assigns talent column with the corresponding talent ratings for games in the given year
        teamDF.loc[teamDF['Year'] == year, 'SP'] = sp.values[0]

        teamDict[team] = teamDF

    # now grabbing the opponent's talent data and updating each team's dataframes with it
    for team, teamDF in teamDict.items():

        teamDF = teamDF.copy()

        teamDF['SP_opp'] = np.nan

        lastRow = teamDF.iloc[-1]

        if team == lastRow['HomeTeam']:
            
            # grabbing each opponent's name to access their talent rating in talentRatings
            oppName = lastRow['AwayTeam']

        if team == lastRow['AwayTeam']:
            
            # grabbing each opponent's name to access their talent rating in talentRatings
            oppName = lastRow['HomeTeam']
            
         
        # getting the opponent's talent rating column from the game they played the given team
        oppRow = spRatings[spRatings['Team'] == oppName]
    
        if not oppRow.empty:

            oppSP = oppRow.iloc[0]['Rating']
        
            # merging the opponent's talent column on the row the team plays them
            teamDF.loc[teamDF.index[-1], 'SP_opp'] = oppSP 

        teamDict[team] = teamDF

Now lets make a function that will grab the data from the last week of games, reformat it, put it  in one dataframe that can be used to append game rows to each team's individual dataframes.

In [453]:
def grabLastWeekData(year, week, teamDict, fcsDict):

    # ignoring a redundant warning message
    warnings.simplefilter(action = 'ignore', category = FutureWarning)

    # setting the download directory and Chrome settings
    directory = '/Users/blaizelahman/Desktop/CFB Model/Recent Data'
    chromeOptions = Options()
    prefs = {'download.default_directory': directory}
    chromeOptions.add_experimental_option('prefs', prefs)
    
    # these extra steps are due to the current version of chromedriver not being compatible with the 
    # current version of chrome at the time of development, feel free to delete or change as it pertains
    # to user situation
    path = '/Users/blaizelahman/Downloads/chromedriver_104'
    olderChromePath = '/Applications/Older Chrome.app/Contents/MacOS/Google Chrome'

    chromeOptions.binary_location = olderChromePath
    
    # creating Chrome driver
    driver = webdriver.Chrome(service = Service(path), options = chromeOptions)

    link = f'https://collegefootballdata.com/exporter/games/teams?year={year}&week={week}&seasonType=regular'

    driver.get(link)
    time.sleep(4) 
            
    # clicking the query button
    query = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Query')]")
    query.click()
    time.sleep(3) 
            
    # clicking the export button
    export = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Export')]")
    export.click()
    time.sleep(3)

    # grabs files from CFBData folder
    files = os.listdir(directory)
        
    # grab the file paths for all files ending in .csv
    filePaths = [os.path.join(directory, name) for name in files if name.endswith('.csv')]

    # grabbing the most recently made file out of those in paths
    file = max(filePaths, key = os.path.getctime)
            
    # loading csv file
    totalWeeklyData = pd.read_csv(file)

    driver.quit()

    # creates new week column that displays the week the game took place
    totalWeeklyData['Week'] = f"Week {week}"

    totalWeeklyData['Year'] = year

    # drops the redundant first row and resets the indices of the datadrame
    totalWeeklyData.drop(0, inplace = True)

    totalWeeklyData.reset_index(drop = True, inplace = True)

    totalWeeklyData.reset_index(level = 0, drop = True, inplace = True)

    totalWeeklyData.rename(columns = {0: 'Game Id', 1: 'School', 2: 'Conference', 
                                  3: 'HomeAway', 4: 'Points', 5: 'Stat Category', 6: 'Stat'}, inplace = True)

    # making a copy of the dataframe to look for non-numeric values
    totalWeeklyDataCopy = totalWeeklyData.copy(deep = True)
    
    # converting string type data points to numerics
    totalWeeklyDataCopy['Stat'] = pd.to_numeric(totalWeeklyDataCopy['Stat'], errors='coerce')
    
    # filter out unique non-numeric rows
    nan_stats = totalWeeklyDataCopy[totalWeeklyDataCopy['Stat'].isna()]['Stat Category'].unique()

    # replacing the "-" in totalPenaltiesYards, completionAttempts, fourthDownEff, and thirdDownEff
    # with ".", and the same with ":" in possessionTime so they can be converted to numeric values
    totalWeeklyData['Stat'] = totalWeeklyData['Stat'].str.replace('-', '.')
    totalWeeklyData['Stat'] = totalWeeklyData['Stat'].str.replace(':', '.')
    
    totalWeeklyData['Stat'] = pd.to_numeric(totalWeeklyData['Stat'], errors = 'coerce')
    
    # converting points to numerics
    totalWeeklyData['Points'] = pd.to_numeric(totalWeeklyData['Points'], errors = 'coerce')

    # pivoting the Stat Category column into multiple columns each with their respective stat
    totalWeeklyData = totalWeeklyData.pivot(index=['Game Id', 'School', 'Conference', 'HomeAway', 'Points', 'Week', 'Year'], 
                          columns='Stat Category', 
                          values='Stat').reset_index()
    
    # flattening the columns
    totalWeeklyData.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in totalWeeklyData.columns]

    tempDict = {}
    for team in totalWeeklyData.School.unique():
        
        dfName = team

        # getting rid of redundant error warning
        pd.options.mode.chained_assignment = None
    
    
        # getting all games with the given team
        teamGames = totalWeeklyData[totalWeeklyData['School'] == team]
        gameIDs = teamGames['Game Id'].unique()
        
        # adding in all opponents of the given team and their stats
        teamDF = totalWeeklyData[totalWeeklyData['Game Id'].isin(gameIDs)]
    
        # converting the week column to an int so the dataframe can then be sorted by year and week
        teamDF['Week'] = teamDF['Week'].str[-2:].astype(int)
        teamDF = teamDF.sort_values(by = ["Year", "Week"])
    
        teamDF = teamDF.reset_index(drop=True)
    
        # adding a total touchdown column
        teamDF['totalTDs'] = teamDF[['passingTDs', 'rushingTDs', 'interceptionTDs', 'kickReturnTDs', 'puntReturnTDs']].sum(axis = 1, skipna = True)
    
        # merging dataframe to pair teams who played each other by Game Id and differentiating the opponent's stats
        mergeTeamDF = teamDF.merge(teamDF, on='Game Id', suffixes=('', '_opp'))
    
        # making sure there's no duplicates
        mergeTeamDF = mergeTeamDF[mergeTeamDF['School'] != mergeTeamDF['School_opp']]
    
        # getting score differentials and point totals
        mergeTeamDF['scoreDiff'] = mergeTeamDF['Points'] - mergeTeamDF['Points_opp']
        mergeTeamDF['pointTotal'] = mergeTeamDF['Points'] + mergeTeamDF['Points_opp']

        # adding a win column to show if the given team won a game or not
        mergeTeamDF['Win'] = mergeTeamDF['scoreDiff'] > 0
    
        # setting the dataframe to the merged version
        teamDF = mergeTeamDF
    
        teamDF = teamDF[teamDF['School'] == team]
        
        tempDict[dfName] = teamDF

    # grabbing all of the rolling columns that we will have to update (not including opposing columns)
    rollingCols = [col for col in teamDict['Florida State'].columns if 'rolling_sum' in col and '_opp' not in col]

    print(tempDict['North Alabama'])
    
    import copy
    
    newTeamDict = copy.deepcopy(teamDict)
    newFCSDict = copy.deepcopy(fcsDict)

    for dictionary in [newTeamDict, newFCSDict]:
        
        # going through teams in teamDict and appending the recent games to their respective dataframes 
        # and adding in rolling sum, talent, and SP+ data
        for team, teamDF in dictionary.items():
    
            # making sure that team played this past week
            if team in tempDict:
    
                teamDF = teamDF.copy()
                
                # appending the recent game onto the current team dataframe
                teamDF = pd.concat([teamDF, tempDict[team]], ignore_index = True)
    
                # getting rolling sum values
                for col in rollingCols:
                    
                    # grabbing the column and window of games to pull from
                    parts = col.split('_')
                    columnName = parts[2]
        
                    if columnName[-1] == '8':
                        columnName = columnName[:-1]
                        window = 8
                    else:
                        columnName = columnName[:-2]
                        window = 20
                    
                    # grabbing the rolling sum and setting the given column in the most recent row with it
                    rollingSum = customRollingSum(teamDF[columnName], window)
        
                    # correcting yardsPerPass and yardPerRushAttempt columns as we did in p1
                    if col == 'rolling_sum_yardsPerPass20' or col == 'rolling_sum_yardsPerRushAttempt20':
                        rollingSum = rollingSum / 20
        
                    if col == 'rolling_sum_yardsPerPass8' or col == 'rolling_sum_yardsPerRushAttempt8':
                        rollingSum = rollingSum / 8
                        
                    teamDF.at[teamDF.index[-1], col] = rollingSum.iloc[-1]
    
                    dictionary[team] = teamDF
    
        # merging opposing team's rolling sum columns
        for team, teamDF in dictionary.items():
    
            if team in tempDict:
    
                teamDF = dictionary[team].copy()
    
                # grabbing most recent game and index
                lastRow = teamDF.iloc[-1]
                lastIndex = teamDF.index[-1]
            
                # grabbing the game id and opponent's name to access the game in their dataframe
                gameID = lastRow['Game Id']
        
                oppName = lastRow['School_opp']
        
                # grabbing the opponent's dataframe based on what dictionary they're in
                if oppName in dictionary:
                    
                    oppDF = dictionary[oppName]
                    
                elif oppName in fcsDict: 
                    oppDF = fcsDict[oppName]
        
                else: 
                    continue
        
                # grabbing the opponent's rolling_sum columns from the game they played against the given team
                oppRow = oppDF[oppDF['Game Id'] == gameID]
        
                if not oppRow.empty:
            
                    # merging the opponent's rolling_sum columns on the row the team plays them
                    for col in rollingCols:
                        teamDF.loc[lastIndex, col + '_opp'] = oppRow.iloc[0][col]
    
                # skipping the talent and SP+ rating step if it's week 1 because this will be done using
                # grabUpcomingYearTalent and grabUpcomingYearSP
                if week == 1:
                    dictionary[team] = teamDF
                    continue
                
                # ensure columns exist and set them as NaN values if not
                if 'talent' not in teamDF.columns:
                    teamDF['talent'] = pd.NA
                if 'SP' not in teamDF.columns:
                    teamDF['SP'] = pd.NA
                
                # assigning talent and SP+ ratings from the previous row
                for col in ['talent', 'SP']:
                    teamDF.loc[lastIndex, col] = teamDF.loc[lastIndex - 1, col]
        
                dictionary[team] = teamDF
    
        # adding in the opposing team's talent and SP+ rating
        for team, teamDF in dictionary.items():
    
            if team in tempDict:
            
                teamDF = dictionary[team].copy()
    
                # grabbing most recent game and index
                lastRow = teamDF.iloc[-1]
                lastIndex = teamDF.index[-1]
        
                oppName = lastRow['School_opp']
        
                # grabbing the opponent's dataframe based on what dictionary they're in
                if oppName in teamDict:
                    
                    oppDF = teamDict[oppName]
        
                    # setting if a team is in the FCS or not because the naming conventions for columns differs
                    fcs = False
                
                elif oppName in fcsDict and oppName not in teamDict: 
                    
                    oppDF = fcsDict[oppName]
        
                    # setting if a team is in the FCS or not because the naming conventions for columns differs
                    fcs = True
        
                else: 
                    continue
        
                # skipping the talent and SP+ adding process if it's week 1
                if week == 1:
                    dictionary[team] = teamDF
                    continue
    
                # seeing if a team is in the FCS or not because the naming conventions for columns differs
                if fcs == False:
                    
                    # grabbing the opposing team's most recent played game to grab their talent and SP+ data
                    filteredOppDF = oppDF[oppDF['Year'] == year]
        
                if fcs == True:
                    filteredOppDF = oppDF[oppDF['Year'] == str(year)]
        
                # checking if dataframe is empty
                if filteredOppDF.empty:
                    print(f"No data for opponent in year {year}: {oppName}")
                    continue 
        
                oppRow = filteredOppDF.iloc[0]
    
                # ensure columns exist and set them as NaN values if not
                if 'talent' not in oppRow.index:
                    oppRow['talent'] = pd.NA
                if 'SP' not in oppRow.index:
                    oppRow['SP'] = pd.NA
                    
                # adding oppossing team talent and SP+ ratings
                for col in ['talent', 'SP']:
                    teamDF.loc[lastIndex, col + '_opp'] = oppRow[col]
        
                dictionary[team] = teamDF


    # now adding in betting data
    # creating Chrome driver
    driver = webdriver.Chrome(service = Service(path), options = chromeOptions)
    # setting the link that directs to the betting data
    bettingDataLink = f'https://collegefootballdata.com/exporter/lines?year=2023&week=6&seasonType=regular'
    
    driver.get(bettingDataLink)
    time.sleep(4) 
            
    # clicking the query button
    query = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Query')]")
    query.click()
    time.sleep(3) 
            
    # clicking the export button
    export = driver.find_element(By.XPATH, "//button[contains(span/text(), 'Export')]")
    export.click()
    time.sleep(3)

    key = str(year)
            
    # grabs files from CFBData folder
    files = os.listdir(directory)
        
    # grab the file paths for all files ending in .csv
    filePaths = [os.path.join(directory, name) for name in files if name.endswith('.csv')]

    # grabbing the most recently made file out of those in paths
    file = max(filePaths, key=os.path.getctime)
            
    # loading csv file
    bettingData = pd.read_csv(file)

    driver.quit()

    # setting our preferred line providers
    lineProviders = ['DraftKings', 'consensus', 'Bovada']

    # getting the preferred line for each individual game
    preferredLines = bettingData.groupby('Id').apply(lambda x: getPreferredLine(x, lineProviders)).reset_index(drop = True)

    # merging the preferred lines with each dataframe in teamDict
    for team, teamDF in newTeamDict.items():

        if team in tempDict:

            teamDF = teamDF.copy()

            # getting the last row of the dataframe
            lastRow = teamDF.iloc[-1]
            lastGameId = lastRow['Game Id']
    
            # finding the matching game row in preferredLines
            gameRow = preferredLines[preferredLines['Id'] == lastGameId]
    
            if not gameRow.empty:
                
                gameRow = gameRow.iloc[0] 

                if gameRow['HomeTeam'] == team:
                    gameRow['Spread'] = float(gameRow['Spread']) * -1

                # getting betting columns to update
                cols = teamDF.columns[-13:]
    
                # updating the betting columns in the last row with betting data
                teamDF.loc[teamDF.index[-1], cols] = gameRow[cols].values

            newTeamDict[team] = teamDF

    return [newTeamDict, newFCSDict]
    


In [449]:
teamDict2, fcsDict2 = grabLastWeekData(2023, 6, teamDict, fcsDict)

In [450]:
teamDict2['Florida State'][-1:]

Unnamed: 0.1,Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,talent_opp,SP,SP_opp,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
188,,401525518,Florida State,ACC,home,39,6,2023,18.25,0.0,20.0,1.1,0.0,1.0,,,0.0,,,,7.0,170.0,5.0,,2.0,28.44,0.0,33.0,2.0,2.0,36.0,3.0,282.0,2.0,30.0,5.0,5.12,0.0,12.99,452.0,0.0,6.8,7.8,5.0,Virginia Tech,ACC,away,17,6,2023,14.27,1.0,19.0,3.4,1.0,0.0,,,0.0,1.0,156.0,4.0,5.0,104.0,3.0,,0.0,31.16,0.0,8.0,1.0,1.0,35.0,1.0,209.0,2.0,22.0,4.0,2.13,3.0,6.6,313.0,1.0,3.9,6.0,2.0,22,56,True,744.0,433.0,8.0,12.0,5.0,415.0,166.0,5067.0,98.0,15.0,41.0,42.0,56.0,720.0,51.0,3634.0,64.0,136.0,22.0,138.749,8701.0,13.0,8.24,4.945,94.0,269.0,159.0,1.0,4.0,1.0,247.0,71.0,2084.0,48.0,7.0,11.0,18.0,33.0,276.0,20.0,1166.0,32.0,66.0,9.0,51.315,3250.0,2.0,7.75,4.175,32.0,442.0,362.0,13.0,16.0,13.0,876.0,114.0,3984.0,68.0,9.0,26.0,42.0,57.0,746.0,24.0,2868.0,42.0,129.0,23.0,158.428,6852.0,26.0,7.22,3.7,53.0,181.0,148.0,6.0,7.0,5.0,373.0,41.0,1602.0,22.0,5.0,10.0,19.0,22.0,317.0,11.0,1050.0,21.0,53.0,10.0,47.99,2652.0,11.0,7.2125,3.2875,23.0,765.33,638.89,19.4,5.8,401525518.0,Florida State,39.0,Virginia Tech,17.0,DraftKings,53.0,24.0,Florida State -24,-25.5,53.0,-2400.0,1100.0


In [456]:
fcsDict2['South Dakota'][-1:]

Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,SP,talent_opp,SP_opp
23,401540364,South Dakota,MVFC,home,38,6,2023,17.23,,24.0,0.1,0.0,1.0,0.0,8.0,0.0,0.0,30.0,1.0,8.0,195.0,,1.0,1.0,35.55,0.0,0.3,1.0,,42.0,4.0,263.0,,,,6.1,0.0,5.35,458.0,0.0,8.5,6.3,5.0,Murray State,MVFC,away,7,6,2023,12.22,,15.0,0.0,1.0,0.0,,,1.0,0.0,76.0,4.0,1.0,157.0,,,1.0,24.05,,,,,29.0,0.0,125.0,,,,2.8,1.0,5.45,282.0,2.0,7.1,4.3,1.0,31,45,True,433.0,332.0,12.0,13.0,9.0,1032.0,103.0,3745.0,2.0,13.0,24.0,14.0,0.0,641.0,31.0,2768.0,1.0,6.0,21.0,120.55,6513.0,21.0,8.405,4.195,55.0,192.0,143.0,4.0,4.0,3.0,186.0,48.0,1760.0,0.0,5.0,8.0,6.0,0.0,246.0,16.0,1183.0,0.0,0.0,6.0,48.27,2943.0,7.0,9.85,4.5875,24.0,,,,,,,,,,,,,,,,,,,,,,,,,,171.0,126.0,8.0,1.0,4.0,398.0,37.0,1159.0,5.0,7.0,8.0,10.0,1.0,329.0,13.0,1542.0,1.0,6.0,10.0,52.815,2701.0,12.0,6.3,4.5875,22.0,96.61,,80.0,


In [457]:
teamDict2['Oregon'][-1:]

Unnamed: 0.1,Unnamed: 0,Game Id,School,Conference,HomeAway,Points,Week,Year,completionAttempts,defensiveTDs,firstDowns,fourthDownEff,fumblesLost,fumblesRecovered,interceptionTDs,interceptionYards,interceptions,kickReturnTDs,kickReturnYards,kickReturns,kickingPoints,netPassingYards,passesDeflected,passesIntercepted,passingTDs,possessionTime,puntReturnTDs,puntReturnYards,puntReturns,qbHurries,rushingAttempts,rushingTDs,rushingYards,sacks,tackles,tacklesForLoss,thirdDownEff,totalFumbles,totalPenaltiesYards,totalYards,turnovers,yardsPerPass,yardsPerRushAttempt,totalTDs,School_opp,Conference_opp,HomeAway_opp,Points_opp,Week_opp,Year_opp,completionAttempts_opp,defensiveTDs_opp,firstDowns_opp,fourthDownEff_opp,fumblesLost_opp,fumblesRecovered_opp,interceptionTDs_opp,interceptionYards_opp,interceptions_opp,kickReturnTDs_opp,kickReturnYards_opp,kickReturns_opp,kickingPoints_opp,netPassingYards_opp,passesDeflected_opp,passesIntercepted_opp,passingTDs_opp,possessionTime_opp,puntReturnTDs_opp,puntReturnYards_opp,puntReturns_opp,qbHurries_opp,rushingAttempts_opp,rushingTDs_opp,rushingYards_opp,sacks_opp,tackles_opp,tacklesForLoss_opp,thirdDownEff_opp,totalFumbles_opp,totalPenaltiesYards_opp,totalYards_opp,turnovers_opp,yardsPerPass_opp,yardsPerRushAttempt_opp,totalTDs_opp,scoreDiff,pointTotal,Win,rolling_sum_Points20,rolling_sum_firstDowns20,rolling_sum_fumblesLost20,rolling_sum_fumblesRecovered20,rolling_sum_interceptions20,rolling_sum_kickReturnYards20,rolling_sum_kickingPoints20,rolling_sum_netPassingYards20,rolling_sum_passesDeflected20,rolling_sum_passesIntercepted20,rolling_sum_passingTDs20,rolling_sum_puntReturns20,rolling_sum_qbHurries20,rolling_sum_rushingAttempts20,rolling_sum_rushingTDs20,rolling_sum_rushingYards20,rolling_sum_sacks20,rolling_sum_tacklesForLoss20,rolling_sum_totalFumbles20,rolling_sum_totalPenaltiesYards20,rolling_sum_totalYards20,rolling_sum_turnovers20,rolling_sum_yardsPerPass20,rolling_sum_yardsPerRushAttempt20,rolling_sum_totalTDs20,rolling_sum_Points8,rolling_sum_firstDowns8,rolling_sum_fumblesLost8,rolling_sum_fumblesRecovered8,rolling_sum_interceptions8,rolling_sum_kickReturnYards8,rolling_sum_kickingPoints8,rolling_sum_netPassingYards8,rolling_sum_passesDeflected8,rolling_sum_passesIntercepted8,rolling_sum_passingTDs8,rolling_sum_puntReturns8,rolling_sum_qbHurries8,rolling_sum_rushingAttempts8,rolling_sum_rushingTDs8,rolling_sum_rushingYards8,rolling_sum_sacks8,rolling_sum_tacklesForLoss8,rolling_sum_totalFumbles8,rolling_sum_totalPenaltiesYards8,rolling_sum_totalYards8,rolling_sum_turnovers8,rolling_sum_yardsPerPass8,rolling_sum_yardsPerRushAttempt8,rolling_sum_totalTDs8,rolling_sum_Points20_opp,rolling_sum_firstDowns20_opp,rolling_sum_fumblesLost20_opp,rolling_sum_fumblesRecovered20_opp,rolling_sum_interceptions20_opp,rolling_sum_kickReturnYards20_opp,rolling_sum_kickingPoints20_opp,rolling_sum_netPassingYards20_opp,rolling_sum_passesDeflected20_opp,rolling_sum_passesIntercepted20_opp,rolling_sum_passingTDs20_opp,rolling_sum_puntReturns20_opp,rolling_sum_qbHurries20_opp,rolling_sum_rushingAttempts20_opp,rolling_sum_rushingTDs20_opp,rolling_sum_rushingYards20_opp,rolling_sum_sacks20_opp,rolling_sum_tacklesForLoss20_opp,rolling_sum_totalFumbles20_opp,rolling_sum_totalPenaltiesYards20_opp,rolling_sum_totalYards20_opp,rolling_sum_turnovers20_opp,rolling_sum_yardsPerPass20_opp,rolling_sum_yardsPerRushAttempt20_opp,rolling_sum_totalTDs20_opp,rolling_sum_Points8_opp,rolling_sum_firstDowns8_opp,rolling_sum_fumblesLost8_opp,rolling_sum_fumblesRecovered8_opp,rolling_sum_interceptions8_opp,rolling_sum_kickReturnYards8_opp,rolling_sum_kickingPoints8_opp,rolling_sum_netPassingYards8_opp,rolling_sum_passesDeflected8_opp,rolling_sum_passesIntercepted8_opp,rolling_sum_passingTDs8_opp,rolling_sum_puntReturns8_opp,rolling_sum_qbHurries8_opp,rolling_sum_rushingAttempts8_opp,rolling_sum_rushingTDs8_opp,rolling_sum_rushingYards8_opp,rolling_sum_sacks8_opp,rolling_sum_tacklesForLoss8_opp,rolling_sum_totalFumbles8_opp,rolling_sum_totalPenaltiesYards8_opp,rolling_sum_totalYards8_opp,rolling_sum_turnovers8_opp,rolling_sum_yardsPerPass8_opp,rolling_sum_yardsPerRushAttempt8_opp,rolling_sum_totalTDs8_opp,talent,talent_opp,SP,SP_opp,Id,HomeTeam,HomeScore,AwayTeam,AwayScore,LineProvider,OverUnder,Spread,FormattedSpread,OpeningSpread,OpeningOverUnder,HomeMoneyline,AwayMoneyline
186,186,401539475,Oregon,Pac-12,away,31,14,2023,21.34,0.0,17.0,2.2,0.0,0.0,0.0,0.0,1.0,,,,7.0,239.0,1.0,1.0,3.0,22.52,,,,13.0,20.0,1.0,124.0,2.0,46.0,4.0,3.1,,2.21,363.0,1.0,7.0,6.2,4.0,Washington,Pac-12,home,34,14,2023,29.41,0.0,26.0,0.1,0.0,0.0,0.0,3.0,1.0,0.0,0.1,1.0,10.0,324.0,4.0,1.0,2.0,37.08,0.0,32.0,1.0,7.0,37.0,2.0,157.0,0.0,32.0,3.0,10.15,1.0,7.5,481.0,1.0,7.9,4.2,4.0,-3,65,False,861.0,531.0,5.0,15.0,8.0,630.0,161.0,6528.0,85.0,22.0,59.0,26.0,46.0,709.0,55.0,4078.0,40.0,92.0,17.0,154.459,10606.0,13.0,9.48,5.715,115.0,327.0,211.0,2.0,3.0,3.0,220.0,53.0,2862.0,36.0,6.0,28.0,7.0,24.0,255.0,17.0,1348.0,19.0,41.0,6.0,58.99,4210.0,5.0,10.1375,5.325,45.0,771.0,500.0,8.0,6.0,15.0,955.1,159.0,7170.0,65.0,18.0,49.0,21.0,35.0,567.0,48.0,2655.0,38.0,98.0,19.0,157.699,9825.0,23.0,9.02,4.525,100.0,260.0,174.0,5.0,2.0,7.0,467.1,56.0,2238.0,36.0,8.0,18.0,10.0,19.0,230.0,14.0,1013.0,13.0,37.0,9.0,61.99,3251.0,12.0,7.6,4.0,33.0,874.74,751.12,26.2,16.4,401539475.0,Washington,34.0,Oregon,31.0,DraftKings,67.0,9.0,Oregon -9,9.0,67.0,300.0,-380.0


We can see that this works on both FBS and FCS dictionaries, as well as team's that had byes in the given week. Lastly, we'll write a function that will save the upcoming week's predictions to a .csv file so that we can keep track of our predictions over the course of the season.

In [490]:
def savePredictions(predsDict):

    predsDF = pd.DataFrame()

    # going through predictions and adding them onto predsDF
    for team, teamPreds in predsDict.items():
        
        # only adding games that were played
        if len(teamPreds) == 7:
            teamDF = pd.DataFrame([teamPreds], columns=['pred', 'spread', 'spreadDiff', 'cover', 'gameID', 'team', 'oppTeam'])
            predsDF = pd.concat([predsDF, teamDF], ignore_index = True)

    return predsDF

In [491]:
predsDF = savePredictions(predsDict)

In [492]:
predsDF

Unnamed: 0,pred,spread,spreadDiff,cover,gameID,team,oppTeam
0,14.5,10.5,4.0,1,401532589.0,Air Force,San Diego State
1,-4.0,2.5,-6.5,-1,401532412.0,Akron,Buffalo
2,25.0,14.5,10.5,1,401520285.0,Alabama,Mississippi State
3,18.5,13.5,5.0,1,401531882.0,Appalachian State,Louisiana Monroe
4,8.5,-20.0,28.5,1,401524021.0,Arizona,Washington
5,-10.0,-12.5,2.5,1,401524022.0,Arizona State,California
6,-10.5,-6.0,-4.5,-1,401520279.0,Arkansas,Texas A&M
7,9.5,-1.0,10.5,1,401526836.0,Arkansas State,UMass
8,-15.5,-14.0,-1.5,-1,401520280.0,Auburn,Georgia
9,-3.0,-2.0,-1.0,-1,401525851.0,BYU,Cincinnati


Looks good. Now we have all of the functions we need to be able to effectively grab upcoming game data, make predictions based on that data, grab the recent week's data, and save our predictions so that we cna easily update and run our model mid season.