In [1]:
import json
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from sklearn import preprocessing
import urllib

In [2]:
curWeek = 15

In [3]:
P5 = ['ACC','Pac-12','Big Ten','Big 12','SEC','Notre Dame']
G5 = ['Conference USA','Sun Belt','Mid-American','American Athletic','Mountain West','FBS Independents']

def seasonProgression(dfR):
    totalGames = len(dfR)
    curWeek = int(dfR['week']) + 1
    return curWeek/(2*totalGames)

def winningTeam(dfR):
    if dfR['home_points'] > dfR['away_points']:
        return dfR['home_team']
    else:
        return dfR['away_team']
    
def teamWin(dfR, team):
    if dfR.winner == team:
        return True
    else:
        return False
    
def findOpp(dfR):
    if dfR.home == True:
        return dfR.away_team
    else:
        return dfR.home_team
def findOppConf(dfR):
    if dfR.home == True:
        return dfR.away_conference
    else:
        return dfR.home_conference
    
def locMult(dfR):
    if dfR['neutral_site'] == True:
        return 1.05
    elif dfR['home'] == True:
        return 1.1
    else:
        return 1.
    
def pointDiff(dfR,team):
    if dfR.home_team == team:
        return dfR.home_points - dfR.away_points
    else:
        return dfR.away_points - dfR.home_points
    
def confMult(dfR):
    if dfR.opp_conference in P5:
        return 1.
    elif dfR.opp_conference in G5:
        return 0.5
    else:
        return 0.15
    
def winQ(dfR,week): 
    if dfR.opp_conference == None:
        oppWinPct = 0.5
    else:
#         print(dfR.opp_conference)
        oppWinPct = FBSdict[dfR.opp].loc[FBSdict[dfR.opp].week <= week, 'winPct'].tail(1).item()
    
    if dfR.teamWin == True:
        winMult = 1
        oppFactor = oppWinPct
    else:
        winMult = -1
        oppFactor = 1 - oppWinPct
    
    return oppFactor * dfR.seasonProg * dfR.conf_mult * winMult*(dfR.loc_mult + 0.01*dfR.point_diff)

def calcPct(team, week):
    winqweek = 'win_q_'+str(week)
    wins = FBSdict[team].loc[FBSdict[team].week <= week, 'wins'].tail(1).item()
    losses = FBSdict[team].loc[FBSdict[team].week <= week, 'losses'].tail(1).item()
    return 0.9 * FBSdict[team][winqweek].mean() + 0.01*(wins-losses)

In [4]:
json_url = urllib.request.urlopen('https://api.collegefootballdata.com/games?year=2019&seasonType=regular')
jsonFile = json.loads(json_url.read())
curSch = pd.DataFrame(jsonFile)

In [5]:
json_url2 = urllib.request.urlopen('https://api.collegefootballdata.com/games?year=2019&seasonType=postseason')
jsonFile2 = json.loads(json_url2.read())
postSch = pd.DataFrame(jsonFile2)
postSch['week'] = postSch['week'] + 15
curSch = curSch.append(postSch)
curSch = curSch.dropna(subset=['away_points'])

In [6]:
curSch.loc[curSch.away_team == 'Notre Dame','away_conference'] = 'Notre Dame'
curSch.loc[curSch.home_team == 'Notre Dame','home_conference'] = 'Notre Dame'

In [7]:
curSch.tail(10)

Unnamed: 0,attendance,away_conference,away_line_scores,away_points,away_post_win_prob,away_team,conference_game,home_conference,home_line_scores,home_points,home_post_win_prob,home_team,id,neutral_site,season,season_type,start_date,venue,venue_id,week
839,,Mid-American,"[7, 3, 7, 9]",26,0.2777377689395574,Miami (OH),True,Mid-American,"[0, 14, 0, 7]",21,0.7222622310604426,Central Michigan,401132982,True,2019,regular,2019-12-07T17:00:00.000Z,Ford Field,3727,15
840,,Sun Belt,"[7, 10, 7, 14]",38,0.2958240041209234,Louisiana,True,Sun Belt,"[21, 14, 7, 3]",45,0.7041759958790765,Appalachian State,401132984,False,2019,regular,2019-12-07T17:00:00.000Z,Kidd Brewer Stadium,3792,15
841,,Big 12,"[0, 13, 0, 10, 0]",23,0.0034149314646176,Baylor,True,Big 12,"[10, 0, 10, 3, 7]",30,0.9965850685353824,Oklahoma,401132974,True,2019,regular,2019-12-07T17:00:00.000Z,AT&T Stadium,3687,15
842,,Conference USA,"[3, 3, 0, 0]",6,0.0019752691121971,UAB,True,Conference USA,"[14, 21, 7, 7]",49,0.9980247308878027,Florida Atlantic,401132980,False,2019,regular,2019-12-07T18:30:00.000Z,FAU Stadium,3715,15
843,,American Athletic,"[7, 7, 7, 3]",24,0.3651174969968793,Cincinnati,True,American Athletic,"[10, 0, 10, 9]",29,0.6348825030031207,Memphis,401132976,False,2019,regular,2019-12-07T20:30:00.000Z,Liberty Bowl Memorial Stadium,3805,15
844,,SEC,"[0, 3, 0, 7]",10,0.0013406671769196,Georgia,True,SEC,"[14, 3, 17, 3]",37,0.9986593328230804,LSU,401132981,True,2019,regular,2019-12-07T21:00:00.000Z,Mercedes-Benz Stadium,5348,15
845,,Mountain West,"[3, 0, 0, 7]",10,0.0060112984399582,Hawai'i,True,Mountain West,"[3, 14, 14, 0]",31,0.9939887015600416,Boise State,401132979,False,2019,regular,2019-12-07T21:00:00.000Z,Albertsons Stadium,3653,15
846,,ACC,"[7, 0, 7, 3]",17,0.0010709010862935,Virginia,True,ACC,"[14, 17, 14, 17]",62,0.9989290989137064,Clemson,401132975,True,2019,regular,2019-12-08T00:30:00.000Z,Bank of America Stadium,3628,15
847,,Big Ten,"[0, 7, 17, 10]",34,0.7616140407818995,Ohio State,True,Big Ten,"[7, 14, 0, 0]",21,0.2383859592181004,Wisconsin,401132983,True,2019,regular,2019-12-08T01:00:00.000Z,Lucas Oil Stadium,3812,15
848,,FBS Independents,"[7, 0, 0, 0]",7,0.0018978494737622,Army,False,American Athletic,"[0, 14, 7, 10]",31,0.9981021505262376,Navy,401114335,True,2019,regular,2019-12-14T20:00:00.000Z,Lincoln Financial Field,3806,15


In [8]:
FBSteams = curSch['home_team'].unique()
FBSteams.sort()

In [18]:
FBSteams

array(['Air Force', 'Akron', 'Alabama', 'Appalachian State', 'Arizona',
       'Arizona State', 'Arkansas', 'Arkansas State', 'Army', 'Auburn',
       'BYU', 'Ball State', 'Baylor', 'Boise State', 'Boston College',
       'Bowling Green', 'Buffalo', 'California', 'Central Michigan',
       'Charlotte', 'Cincinnati', 'Clemson', 'Coastal Carolina',
       'Colorado', 'Colorado State', 'Connecticut', 'Duke',
       'East Carolina', 'Eastern Michigan', 'Florida', 'Florida Atlantic',
       'Florida International', 'Florida State', 'Fresno State',
       'Georgia', 'Georgia Southern', 'Georgia State', 'Georgia Tech',
       "Hawai'i", 'Houston', 'Illinois', 'Indiana', 'Iowa', 'Iowa State',
       'Kansas', 'Kansas State', 'Kent State', 'Kentucky', 'LSU',
       'Liberty', 'Louisiana', 'Louisiana Monroe', 'Louisiana Tech',
       'Louisville', 'Marshall', 'Maryland', 'Memphis', 'Miami',
       'Miami (OH)', 'Michigan', 'Michigan State', 'Middle Tennessee',
       'Minnesota', 'Mississippi St

In [9]:
FBSdict = {}
for team in FBSteams:
    FBSdict[team] = curSch[(curSch['away_team'] == team) | 
                           (curSch['home_team'] == team)]
#     FBSdict[team] = FBSdict[team][np.isfinite(FBSdict[team].away_points)]
    FBSdict[team] = FBSdict[team].reset_index()

In [10]:
FBSbar = tqdm(total=len(FBSteams))
for key in FBSdict:
    FBSbar.set_description(key)
    FBSdict[key]['seasonProg'] = FBSdict[key].apply(lambda row: seasonProgression(row),axis = 1)
    FBSdict[key]['winner'] = FBSdict[key].apply(lambda row: winningTeam(row),axis = 1)
    FBSdict[key]['teamWin'] = FBSdict[key].apply(lambda row: teamWin(row,key),axis = 1)
    
    wins = 0
    losses = 0
    winList = []
    lossList = []
    for row in FBSdict[key].itertuples():
    #     print(row.winner)
        if row.teamWin:
            wins += 1
        else:
            losses += 1
        winList.append(wins)
        lossList.append(losses)
        
    FBSdict[key]['wins'] = winList
    FBSdict[key]['losses'] = lossList
    
    hometeam = FBSdict[key]['home_team'] == key
    FBSdict[key]['home'] = hometeam
    
    FBSdict[key]['winPct'] = FBSdict[key]['wins']/(FBSdict[key]['losses'] + FBSdict[key]['wins'])
    FBSdict[key]['opp'] = FBSdict[key].apply(lambda row: findOpp(row), axis = 1)
    FBSdict[key]['opp_conference'] = FBSdict[key].apply(lambda row: findOppConf(row), axis = 1)
    FBSdict[key]['point_diff'] = FBSdict[key].apply(lambda row: pointDiff(row,key), axis = 1)
    FBSdict[key]['loc_mult'] = FBSdict[key].apply(lambda row: locMult(row), axis = 1)
    FBSdict[key]['conf_mult'] = FBSdict[key].apply(lambda row: confMult(row), axis = 1)
    
    FBSbar.update(1)

HBox(children=(IntProgress(value=0, max=130), HTML(value='')))

In [11]:
FBSbar2 = tqdm(total=len(FBSteams))
for key in FBSdict:
    FBSbar2.set_description(key)
    for w in range(4,curWeek+1):
        FBSdict[key]['win_q_'+str(w)] = FBSdict[key].apply(lambda row: winQ(row, w),axis=1)
    FBSbar2.update()

HBox(children=(IntProgress(value=0, max=130), HTML(value='')))

In [12]:
FBSbar3 = tqdm(total=len(FBSteams))
FBSpct = {}
for key in FBSdict:
    FBSbar3.set_description(key)
    pctList = []
    for w in range(4,curWeek+1):
        pctList.append(calcPct(key,w))
    FBSpct[key] = pctList
    FBSbar3.update()

HBox(children=(IntProgress(value=0, max=130), HTML(value='')))

In [13]:
PCTdf = pd.DataFrame.from_dict(FBSpct,orient='index',columns=range(4,curWeek+1))
PCTdf.to_csv('2019/PCT.csv')

In [14]:
PCTnormdf = pd.DataFrame(index = FBSteams,columns=range(4,curWeek+1))

for w in range(4,curWeek+1):
    x = PCTdf[w].values.reshape(-1,1)
    min_max_scaler = preprocessing.MinMaxScaler()
    xScaled = min_max_scaler.fit_transform(x)
    PCTnormdf[w] = xScaled.reshape(130)

PCTnormdf.to_csv('2019/PCT_norm.csv')

In [15]:
Ranks = PCTdf.rank(method='first',ascending=False).astype('int64')
Ranks.to_csv('2019/Ranks.csv')

In [16]:
for w in range(4,curWeek+1):
    Wdf = pd.concat([PCTnormdf[w],Ranks[w]],axis=1)
    Wdf.columns = ['PCT','Rank']
    Wdf = Wdf.sort_values('Rank')
    Wdf.to_csv('2019/W%s.csv'%w)

In [17]:
Ranks.sort_values(15)

Unnamed: 0,4,5,6,7,8,9,10,11,12,13,14,15
Ohio State,1,1,1,1,1,1,2,3,1,1,1,1
LSU,2,2,2,3,2,2,3,2,2,2,2,2
Clemson,3,3,3,2,3,3,1,1,3,3,3,3
Oklahoma,4,4,4,4,4,4,4,4,4,4,4,4
Oregon,5,7,5,5,5,5,5,5,5,7,7,5
Georgia,7,6,6,10,8,9,8,7,6,5,5,6
Utah,8,8,11,11,9,8,9,8,7,6,6,7
Notre Dame,11,14,13,9,10,14,11,10,10,9,9,8
Memphis,22,18,16,17,17,16,13,14,13,12,11,9
Baylor,6,5,7,6,6,7,7,6,9,8,8,10
