In [1]:
import json
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from sklearn import preprocessing
import urllib
pd.set_option('display.max_columns', None)

def seasonProgression(row):
    return row.week/15

def winningTeam(row):
    if row['home_points'] > row['away_points']:
        return row['home_team']
    elif row['away_points'] > row['home_points']:
        return row['away_team']
    else:
        return None

def teamWin(row,team):
    if row.winner == team:
        return True
    elif row.winner == None:
        return None
    else:
        return False
    
def findOpp(row):
    if row.home == True:
        return row.away_team
    else:
        return row.home_team

def findOppConf(dfR):
    if dfR.home == True:
        return dfR.away_conference
    else:
        return dfR.home_conference

def locMult(dfR):
    if dfR['neutral_site'] == True:
        return 1.05
    elif dfR['home'] == True:
        return 1.1
    else:
        return 1.
    
def pointDiff(dfR,team):
    if dfR.home:
        return dfR.home_points - dfR.away_points
    else:
        return dfR.away_points - dfR.home_points
    
def confMult(dfR):
    if dfR.opp_conference in P5:
        return 1.
    elif dfR.opp_conference in G5:
        return 0.5
    else:
        return 0.15



def winQ(row,week):
    if row.week > week:
        return 0.
    
    if row.opp_conference == None:
        oppWinPct = 0.5
    else:
        opp = row.opp
        oppWinPct = FBSdict[opp].loc[FBSdict[opp].week <= week, 'winPct'].iloc[-1]
    
    if row.teamWin:
        winMult = 1
        oppFactor = oppWinPct
    else:
        winMult = -1
        oppFactor = 1 - oppWinPct
    
    return oppFactor * row.seasonProg * row.conf_mult * winMult*(row.loc_mult + 0.01*row.point_diff)
    

def calcPct(team,week):
    winqweek = 'win_q_%s'%week
    wins = FBSdict[team].loc[FBSdict[team].week <= week,'wins'].iloc[-1]
    losses = FBSdict[team].loc[FBSdict[team].week <= week,'losses'].iloc[-1]
    return 0.9*FBSdict[team].loc[FBSdict[team].week <= week, winqweek].mean() + 0.01*(wins-losses)

In [5]:
year = 2020
gameThresh = 4

P5 = ['ACC','Pac-12','Big Ten','Big 12','SEC','Notre Dame']
G5 = ['Conference USA','Sun Belt','Mid-American','American Athletic','Mountain West','FBS Independents']

In [20]:
gamesDF = pd.read_json('https://api.collegefootballdata.com/games?year=%s&seasonType=regular'%(year))
gamesDF = gamesDF[gamesDF.home_points == gamesDF.home_points]

In [39]:
curWeek = gamesDF.week.max()

2020

In [21]:
gamesDF.loc[(gamesDF.away_team == 'Notre Dame'),'away_conference'] = 'Notre Dame'
gamesDF.loc[(gamesDF.home_team == 'Notre Dame'),'home_conference'] = 'Notre Dame'

In [22]:
teamsDF = pd.read_json('https://api.collegefootballdata.com/teams/fbs?year=%s'%(year))
FBSteams = np.array(teamsDF['school'])

In [23]:
FBSdict = {}
for team in FBSteams:
    FBSdict[team] = gamesDF[(gamesDF['away_team'] == team) | (gamesDF['home_team'] == team)]
    FBSdict[team] = FBSdict[team].reset_index()

In [24]:
FBSbar = tqdm(total=len(FBSteams))
for key in FBSdict:
    if len(FBSdict[key]) == 0:
        FBSbar.update(1)
        continue
    FBSbar.set_description(key)
    FBSdict[key]['seasonProg'] = FBSdict[key].apply(lambda row: seasonProgression(row),axis=1)
    FBSdict[key]['winner'] = FBSdict[key].apply(lambda row: winningTeam(row),axis=1)
    FBSdict[key]['teamWin'] = FBSdict[key].apply(lambda row: teamWin(row,key),axis=1)
    
    wins = 0
    losses = 0
    winList = []
    lossList = []
    
    for row in FBSdict[key].itertuples():
        
        if row.teamWin:
            wins += 1
        elif row.teamWin == None:
            wins += 0
        else:
            losses += 1
        winList.append(wins)
        lossList.append(losses)
    
    FBSdict[key]['wins'] = winList
    FBSdict[key]['losses'] = lossList
    
    hometeam = FBSdict[key]['home_team'] == key
    FBSdict[key]['home'] = hometeam
    
    FBSdict[key]['winPct'] = FBSdict[key]['wins']/(FBSdict[key]['wins'] + FBSdict[key]['losses'])
    FBSdict[key]['opp'] = FBSdict[key].apply(lambda row: findOpp(row),axis=1)
    FBSdict[key]['opp_conference'] = FBSdict[key].apply(lambda row: findOppConf(row),axis=1)
    FBSdict[key]['point_diff'] = FBSdict[key].apply(lambda row: pointDiff(row,key),axis=1)
    FBSdict[key]['loc_mult'] = FBSdict[key].apply(lambda row: locMult(row),axis=1)
    FBSdict[key]['conf_mult'] = FBSdict[key].apply(lambda row: confMult(row),axis=1)
    
    FBSbar.update(1)

HBox(children=(FloatProgress(value=0.0, max=130.0), HTML(value='')))

In [25]:
FBSbar2 = tqdm(total=len(FBSteams))
for key in FBSdict:
    FBSbar2.set_description(key)
    
    if len(FBSdict[key]) == 0:
        FBSbar2.update(1)
        continue
    
    for w in range(FBSdict[key].week.min(),curWeek+1):
        FBSdict[key]['win_q_%s'%w] = FBSdict[key].apply(lambda row: winQ(row,w),axis=1)
#         print(w)
        
    FBSbar2.update(1)

HBox(children=(FloatProgress(value=0.0, max=130.0), HTML(value='')))

In [26]:
FBSbar3 = tqdm(total=len(FBSteams))
FBSpct = {}
for key in FBSdict:
    FBSbar3.set_description(key)
    if len(FBSdict[key]) == 0:
        FBSbar3.update(1)
        continue
    pctList = []
    for w in range(4,curWeek+1):
        if len(FBSdict[key][FBSdict[key].week <= w]) < gameThresh:
            pctList.append(None)
        else:
            try:
                pctList.append(calcPct(key,w))
            except IndexError:
                pctList.append(None)
    FBSpct[key] = pctList
    FBSbar3.update()
PCTdf = pd.DataFrame.from_dict(FBSpct,orient='index',columns=range(gameThresh,curWeek+1))
PCTdf.index = PCTdf.index.rename('School')
PCTdf.to_csv('%s/PCT.csv'%year)

HBox(children=(FloatProgress(value=0.0, max=130.0), HTML(value='')))

In [27]:
min_max_scaler = preprocessing.MinMaxScaler()
PCTnormdf = pd.DataFrame(min_max_scaler.fit_transform(PCTdf[PCTdf.columns]),index=PCTdf.index,columns=range(gameThresh,curWeek+1))

PCTnormdf.to_csv('%s/PCT_norm.csv'%year)

In [28]:
Ranks = PCTdf.rank(method='first',ascending=False).fillna(999).astype('int64')
Ranks.to_csv('%s/Ranks.csv'%year)

In [29]:
printDF = pd.concat([Ranks.add_prefix('Rank_W'),PCTnormdf.add_prefix('PCT_W')],join='inner',axis=1)

In [30]:
for w in range(4,curWeek+1):
    curWeekDF = printDF[['Rank_W%s'%w,'PCT_W%s'%w]].reset_index().dropna()
    curWeekDF['Team'] = curWeekDF.apply(lambda row: teamsDF[teamsDF.school == row['School']].abbreviation.item(),axis=1)
    curWeekDF.reset_index()[['Rank_W%s'%w,'School','PCT_W%s'%w]].sort_values('Rank_W%s'%w).to_csv('%s/W%s.csv'%(year,w),index=False)

In [93]:
(FBSdict['Air Force'].iloc[-1].wins,FBSdict['Air Force'].iloc[-1].losses)

(1, 2)

In [95]:
curWeekDF = printDF[['Rank_W%s'%curWeek,'PCT_W%s'%curWeek]].reset_index().dropna()
curWeekDF['Team'] = curWeekDF.apply(lambda row: teamsDF[teamsDF.school == row['School']].abbreviation.item(),axis=1)
curWeekDF['Logo'] = curWeekDF.apply(lambda row: teamsDF[teamsDF.school == row['School']].logos.item()[0],axis=1)
curWeekDF['LogoStr'] = ["<img src='%s' width=50px style='float:left; padding: 0px 10px'> %s"%(curWeekDF['Logo'].iloc[i],curWeekDF['School'].iloc[i]) for i in range(len(curWeekDF))]
curWeekDF['Record'] = ["(%s-%s)"%(FBSdict[curWeekDF['School'].iloc[i]].iloc[-1].wins,FBSdict[curWeekDF['School'].iloc[i]].iloc[-1].losses) for i in range(len(curWeekDF))] 
curWeekDF.reset_index()[['Rank_W%s'%curWeek,'School','PCT_W%s'%curWeek]].sort_values('Rank_W%s'%curWeek).to_csv('%s/W%s.csv'%(year,curWeek),index=False)

In [98]:
from tabulate import tabulate
print('WEEK %s, %s RANKINGS\nRanking teams who have played at least %s games.\n'%(curWeek,year,gameThresh))
print(tabulate(curWeekDF.reset_index()[['Rank_W%s'%curWeek,'School','PCT_W%s'%curWeek,'Record']].sort_values('Rank_W9').head(25),headers=['Rank','Team','PCT','Record'],tablefmt='github',showindex=False,floatfmt=".3f"))

WEEK 9, 2020 RANKINGS
Ranking teams who have played at least 4 games.

|   Rank | Team             |   PCT | Record   |
|--------|------------------|-------|----------|
|      1 | Alabama          | 1.000 | (6-0)    |
|      2 | Clemson          | 0.917 | (7-0)    |
|      3 | Notre Dame       | 0.799 | (6-0)    |
|      4 | Cincinnati       | 0.777 | (5-0)    |
|      5 | Georgia          | 0.772 | (4-1)    |
|      6 | Coastal Carolina | 0.738 | (6-0)    |
|      7 | Texas A&M        | 0.726 | (4-1)    |
|      8 | BYU              | 0.725 | (7-0)    |
|      9 | Marshall         | 0.698 | (5-0)    |
|     10 | Miami            | 0.690 | (5-1)    |
|     11 | Florida          | 0.671 | (3-1)    |
|     12 | Wake Forest      | 0.649 | (4-2)    |
|     13 | Oklahoma         | 0.632 | (4-2)    |
|     14 | Oklahoma State   | 0.631 | (4-1)    |
|     15 | Virginia Tech    | 0.619 | (4-2)    |
|     16 | SMU              | 0.619 | (6-1)    |
|     17 | Louisiana        | 0.613 | (5-1)    

In [99]:
print(curWeekDF[['Rank_W%s'%curWeek,'LogoStr','PCT_W%s'%curWeek,'Record']].sort_values('Rank_W9').to_html(index=False,escape=False))

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Rank_W9</th>
      <th>LogoStr</th>
      <th>PCT_W9</th>
      <th>Record</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1</td>
      <td><img src='http://a.espncdn.com/i/teamlogos/ncaa/500/333.png' width=50px style='float:left; padding: 0px 10px'> Alabama</td>
      <td>1.000000</td>
      <td>(6-0)</td>
    </tr>
    <tr>
      <td>2</td>
      <td><img src='http://a.espncdn.com/i/teamlogos/ncaa/500/228.png' width=50px style='float:left; padding: 0px 10px'> Clemson</td>
      <td>0.916569</td>
      <td>(7-0)</td>
    </tr>
    <tr>
      <td>3</td>
      <td><img src='http://a.espncdn.com/i/teamlogos/ncaa/500/87.png' width=50px style='float:left; padding: 0px 10px'> Notre Dame</td>
      <td>0.799157</td>
      <td>(6-0)</td>
    </tr>
    <tr>
      <td>4</td>
      <td><img src='http://a.espncdn.com/i/teamlogos/ncaa/500/2132.png' width=50px style='float:left; padding: 0px 10px'> C

In [33]:
print('WEEK %s RANKINGS\nPlayed at least %s games.\n'%(curWeek,gameThresh))
print(tabulate(curWeekDF.reset_index()[['Rank_W%s'%curWeek,'Team']].sort_values('Rank_W9').head(25),tablefmt='plain',showindex=False))

WEEK 9 RANKINGS
Played at least 4 games.

 1  ALA
 2  CLEM
 3  ND
 4  CIN
 5  UGA
 6  CCU
 7  TA&M
 8  BYU
 9  MRSH
10  MIAMI
11  FLA
12  WAKE
13  OKLA
14  OKST
15  VT
16  SMU
17  ULL
18  AUB
19  TEX
20  LIB
21  ARMY
22  ISU
23  NCST
24  UNC
25  TLSA
