# Get Recruiting Data

In [1]:
# import dependencies and get data already created
import pandas as pd
import numpy as np
import requests

# set stub api url
urlstub = "https://api.collegefootballdata.com"

# create list to iterate through for years 2004-2019
years = np.arange(2004,2020,1)

fbsSchoolFrame = pd.read_csv('Data/fbsSchoolData.csv')

In [2]:
# get team recruiting rank data by iterating through years
recruitingFrame = pd.DataFrame()
recruitingYearFrame = pd.DataFrame()
respdf = pd.DataFrame() 
for y in years:
    respdf = pd.DataFrame() 
    recruitingYearFrame = pd.DataFrame() 
    queryurl = f"{urlstub}/recruiting/teams?year={y}"
    resp = requests.get(queryurl)
    respjs = resp.json()
    # convert repsonse to dataframe and append to overall year recruiting dataframe
    for i in range(len(respjs)):
        respdf = pd.DataFrame(respjs[i],index=[i]) 
        recruitingYearFrame = recruitingYearFrame.append(respdf, ignore_index=True, sort=False)
    
    # add year dataframe to overall dataframe
    recruitingFrame = recruitingFrame.append(recruitingYearFrame, ignore_index=True, sort=False)

In [3]:
# merge recruiting data onto overall FBS dataframe
fbsRecruitingFrame = pd.merge(fbsSchoolFrame, recruitingFrame, on=['year','team'], how='left')
fbsRecruitingFrame = fbsRecruitingFrame.rename(columns={"rank":"recruitingRank","points":"recruitingPoints"})
# preview merged dataframe
fbsRecruitingFrame

Unnamed: 0,year,id,team,school,mascot,abbreviation,conference,division,recruitingRank,recruitingPoints
0,2004,2005,Air Force,Air Force,Falcons,AFA,Mountain West,,116.0,21.90
1,2004,2006,Akron,Akron,Zips,AKR,Mid-American,East,101.0,75.91
2,2004,333,Alabama,Alabama,Crimson Tide,ALA,SEC,West,21.0,191.78
3,2004,12,Arizona,Arizona,Wildcats,ARIZ,Pac-10,,41.0,154.87
4,2004,9,Arizona State,Arizona State,Sun Devils,ASU,Pac-10,,23.0,191.49
...,...,...,...,...,...,...,...,...,...,...
1976,2019,98,Western Kentucky,Western Kentucky,Hilltoppers,WKU,Conference USA,East,86.0,153.89
1977,2019,2711,Western Michigan,Western Michigan,Broncos,WMU,Mid-American,West,92.0,148.70
1978,2019,277,West Virginia,West Virginia,Mountaineers,WVU,Big 12,,48.0,191.83
1979,2019,275,Wisconsin,Wisconsin,Badgers,WIS,Big Ten,West,29.0,219.51


In [4]:
# output recruiting data to csv
fbsRecruitingFrame.to_csv('Data/fbsRecruitingData.csv',index=False)

In [5]:
# get regular season win/loss data
gameFrame = pd.DataFrame()
gameYearFrame = pd.DataFrame()
respdf = pd.DataFrame() 
for y in years:
    respdf = pd.DataFrame() 
    gameYearFrame = pd.DataFrame() 
    queryurl = f"{urlstub}/lines?year={y}"
    resp = requests.get(queryurl)
    respjs = resp.json()
    # convert repsonse to dataframe and append to overall year recruiting dataframe
    for i in range(len(respjs)):
        onresp = respjs[i]
        del onresp['lines']
        respdf = pd.DataFrame(onresp,index=[i]) 
        gameYearFrame = gameYearFrame.append(respdf, ignore_index=True, sort=False)
    
    # add year dataframe to overall dataframe
    gameYearFrame['year'] = int(y)
    gameFrame = gameFrame.append(gameYearFrame, ignore_index=True, sort=False)

In [6]:
# find number of home wins, away wins, and ties and combine
gameFrame['tie'] = (gameFrame['homeScore']==gameFrame['awayScore']).astype(int)
gameFrame['homeWin'] = (gameFrame['homeScore']>gameFrame['awayScore']).astype(int)
gameFrame['awayWin'] = (gameFrame['homeScore']<gameFrame['awayScore']).astype(int)
gameFrame.head()

gameFrame['homeLoss'] = gameFrame['awayWin']
gameFrame['awayLoss'] = gameFrame['homeWin']

homegrouped = gameFrame.groupby(['homeTeam','year'])
awaygrouped = gameFrame.groupby(['awayTeam','year'])

homeframe = pd.DataFrame({"homeWins":homegrouped['homeWin'].sum(),
                          "homeLosses":homegrouped['homeLoss'].sum(),
                          "ties":homegrouped['tie'].sum()})
homeframe = homeframe.reset_index()
homeframe = homeframe.rename(columns={"homeTeam":"team"})

awayframe = pd.DataFrame({"awayWins":awaygrouped['awayWin'].sum(),
                          "awayLosses":awaygrouped['awayLoss'].sum()})
awayframe = awayframe.reset_index()
awayframe = awayframe.rename(columns={"awayTeam":"team"})

# combine home and away data and calculate overall wins and losses
homeAwayFrame = pd.merge(homeframe,awayframe,on=['year','team'])

homeAwayFrame['wins'] = homeAwayFrame['homeWins'] + homeAwayFrame['awayWins']
homeAwayFrame['losses'] = homeAwayFrame['homeLosses'] + homeAwayFrame['awayLosses']
homeAwayFrame['games'] = homeAwayFrame['wins'] + homeAwayFrame['losses'] + homeAwayFrame['ties']
homeAwayFrame = homeAwayFrame[['team','year','games','wins','losses','ties']]

# preview the frame
homeAwayFrame

Unnamed: 0,team,year,games,wins,losses,ties
0,Air Force,2004,11,5,6,0
1,Air Force,2005,11,4,7,0
2,Air Force,2006,12,4,8,0
3,Air Force,2007,12,9,3,0
4,Air Force,2008,12,8,4,0
...,...,...,...,...,...,...
1983,Wyoming,2015,12,2,10,0
1984,Wyoming,2016,13,8,5,0
1985,Wyoming,2017,12,7,5,0
1986,Wyoming,2018,12,6,6,0


In [7]:
# merge on to recruiting data and preview
fbsWinLossFrame = pd.merge(fbsSchoolFrame,homeAwayFrame,on=['team','year'],how='left')
fbsWinLossFrame

Unnamed: 0,year,id,team,school,mascot,abbreviation,conference,division,games,wins,losses,ties
0,2004,2005,Air Force,Air Force,Falcons,AFA,Mountain West,,11,5,6,0
1,2004,2006,Akron,Akron,Zips,AKR,Mid-American,East,11,6,5,0
2,2004,333,Alabama,Alabama,Crimson Tide,ALA,SEC,West,11,6,5,0
3,2004,12,Arizona,Arizona,Wildcats,ARIZ,Pac-10,,11,3,8,0
4,2004,9,Arizona State,Arizona State,Sun Devils,ASU,Pac-10,,11,8,3,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1976,2019,98,Western Kentucky,Western Kentucky,Hilltoppers,WKU,Conference USA,East,12,8,4,0
1977,2019,2711,Western Michigan,Western Michigan,Broncos,WMU,Mid-American,West,12,7,5,0
1978,2019,277,West Virginia,West Virginia,Mountaineers,WVU,Big 12,,12,5,7,0
1979,2019,275,Wisconsin,Wisconsin,Badgers,WIS,Big Ten,West,13,10,3,0


In [9]:
# output win/loss data to csv
fbsWinLossFrame.to_csv('Data/fbsWinLossData.csv',index=False)