#  Importing Data From API

In [21]:
import requests
import pandas as pd
import statistics as stat

In [49]:
URL = "https://api.collegefootballdata.com/rankings/"
years = range(2015, 2019)

rankings = {
    "Year": [],
    "Season": [],
    "Week": [],
    "Ranking": [],
    "College": [],
}

for y in years:
    for i in range(1,16):
        PARAMS = {
            'year': y,
            'week': i,
            'seasonType': 'regular'
        }
        
        data = requests.get(url = URL, params=PARAMS).json()
        
        if len(data) > 0:
            ranks = data[0]["polls"][0]["ranks"]
            for r in ranks:
                rankings["Year"].append(y)
                rankings["Week"].append(i)
                rankings["College"].append(r["school"])
                rankings["Ranking"].append(r["rank"])

In [50]:
for y in rankings["Year"]:
    rankings["Season"].append(str(y)+"-"+str(y+1-2000))

In [55]:
df = pd.DataFrame(rankings).drop_duplicates().sort_values(by=["Year", "Week", "Ranking"])
df

Unnamed: 0,Year,Season,Week,Ranking,College
22,2015,2015-16,1,1,Ohio State
19,2015,2015-16,1,2,TCU
3,2015,2015-16,1,3,Alabama
11,2015,2015-16,1,4,Baylor
13,2015,2015-16,1,5,Oregon
23,2015,2015-16,1,6,Michigan State
6,2015,2015-16,1,7,Auburn
8,2015,2015-16,1,8,Florida State
1,2015,2015-16,1,9,Georgia
15,2015,2015-16,1,10,USC


In [52]:
df.to_csv("df.csv", index=False)

# Finding Season Average Rank

In [56]:
season_rankings = df.groupby(["Year", "College"]).mean()
season_rankings

Unnamed: 0_level_0,Unnamed: 1_level_0,Week,Ranking
Year,College,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,Alabama,8.000000,5.733333
2015,Arizona,2.500000,19.250000
2015,Arizona State,1.000000,16.000000
2015,Arkansas,1.500000,19.000000
2015,Auburn,2.500000,13.500000
2015,BYU,3.000000,22.000000
2015,Baylor,8.000000,5.466667
2015,Boise State,4.000000,22.500000
2015,California,6.500000,22.000000
2015,Clemson,8.000000,5.933333


In [57]:
season_rankings_ungrouped = season_rankings.reset_index()
season_rankings_ungrouped

season_rankings_ungrouped = season_rankings_ungrouped.rename(columns={"Ranking": "SeasonRanking"})[["Year",
                                                                                                     "College",
                                                                                                     "SeasonRanking"]]
season_rankings_ungrouped

Unnamed: 0,Year,College,SeasonRanking
0,2015,Alabama,5.733333
1,2015,Arizona,19.250000
2,2015,Arizona State,16.000000
3,2015,Arkansas,19.000000
4,2015,Auburn,13.500000
5,2015,BYU,22.000000
6,2015,Baylor,5.466667
7,2015,Boise State,22.500000
8,2015,California,22.000000
9,2015,Clemson,5.933333


In [58]:
avg_merged = pd.merge(
    left=df, right=season_rankings_ungrouped, how='left', left_on=['Year', 'College'], right_on=['Year', 'College']
)
df = avg_merged.drop_duplicates()
df

Unnamed: 0,Year,Season,Week,Ranking,College,SeasonRanking
0,2015,2015-16,1,1,Ohio State,2.266667
1,2015,2015-16,1,2,TCU,5.866667
2,2015,2015-16,1,3,Alabama,5.733333
3,2015,2015-16,1,4,Baylor,5.466667
4,2015,2015-16,1,5,Oregon,14.555556
5,2015,2015-16,1,6,Michigan State,5.333333
6,2015,2015-16,1,7,Auburn,13.500000
7,2015,2015-16,1,8,Florida State,10.600000
8,2015,2015-16,1,9,Georgia,12.750000
9,2015,2015-16,1,10,USC,16.777778


# Merging Data into Main Dataset

In [59]:
game_data = pd.read_csv('game_data.csv')
del game_data['Unnamed: 0']
game_data

Unnamed: 0,School,Opponent,Date,Season,AtHome,NeutralLocation,Score,OppScore,Total,Outcome,...,XPM,XPA,XPPercent,FGM,FGA,FGPercent,KickPts,Fum,Int,TotalTO
0,Georgia State,Charlotte,2015-09-04,2015-16,1,0,20,23,43,L,...,2,2,100.0,2,3,66.7,8,2,1,3
1,Oregon State,Weber State,2015-09-04,2015-16,1,0,26,7,33,W,...,2,2,100.0,4,4,100.0,14,0,1,1
2,Syracuse,Rhode Island,2015-09-04,2015-16,1,0,47,0,47,W,...,6,6,100.0,1,2,50.0,9,0,1,1
3,Western Michigan,Michigan State,2015-09-04,2015-16,1,0,24,37,61,L,...,3,3,100.0,1,2,50.0,6,0,2,2
4,Central Michigan,Oklahoma State,2015-09-03,2015-16,1,0,13,24,37,L,...,1,1,100.0,2,2,100.0,7,0,1,1
5,Fresno State,Abilene Christian,2015-09-03,2015-16,1,0,34,13,47,W,...,4,4,100.0,2,3,66.7,10,1,2,3
6,Hawaii,Colorado,2015-09-03,2015-16,1,0,28,20,48,W,...,2,2,100.0,2,2,100.0,8,0,2,2
7,Idaho,Ohio,2015-09-03,2015-16,1,0,28,45,73,L,...,2,2,100.0,2,2,100.0,8,1,2,3
8,Minnesota,Texas Christian,2015-09-03,2015-16,1,0,17,23,40,L,...,2,2,100.0,1,1,100.0,5,2,0,2
9,San Jose State,New Hampshire,2015-09-03,2015-16,1,0,43,13,56,W,...,4,6,66.7,1,2,50.0,7,0,0,0


### Replace Differently Named Schools

In [60]:
def check_names(dataset):
    bad_names = []
    for w in dataset["College"]:
        if w not in game_data["School"].values and w not in game_data["Opponent"].values and w not in bad_names:
            bad_names.append(w)
    print(len(bad_names))
    print(bad_names)

In [61]:
df_fix_names = df
df_fix_names = df_fix_names.replace('Ohio State', 'Ohio')
df_fix_names = df_fix_names.replace('LSU', 'Louisiana State')
df_fix_names = df_fix_names.replace('Ole Miss', 'Mississippi')
df_fix_names = df_fix_names.replace('Pitt', 'Pittsburgh')
df_fix_names = df_fix_names.replace('SMU', 'Southern Methodist')
df_fix_names = df_fix_names.replace('UAB', 'Alabama-Birmingham')
df_fix_names = df_fix_names.replace('UCF', 'Central Florida')
df_fix_names = df_fix_names.replace('UCF', 'Central Florida')
df_fix_names = df_fix_names.replace('UNLV', 'Nevada-Las Vegas')
df_fix_names = df_fix_names.replace('USC', 'Southern California')
df_fix_names = df_fix_names.replace('UTEP', 'Texas-El Paso')
df_fix_names = df_fix_names.replace('UTSA', 'TExas-San Antonio')
df_fix_names = df_fix_names.replace("Mississippi St", "Mississippi State")
df_fix_names = df_fix_names.replace("LSU", "Louisiana State")
df_fix_names = df_fix_names.replace("Miami Florida", "Miami (FL)")
df_fix_names = df_fix_names.replace("NC State", "North Carolina State")
df_fix_names = df_fix_names.replace("Florida Intl", "Florida International")
df_fix_names = df_fix_names.replace("USC", "Southern California")
df_fix_names = df_fix_names.replace("TCU", "Texas Christian")
df_fix_names = df_fix_names.replace("UL Lafayette", "Lafayette")
df_fix_names = df_fix_names.replace("Appalachian St", "Appalachian State")
df_fix_names = df_fix_names.replace("SMU", "Southern Methodist")
df_fix_names = df_fix_names.replace("Bowling Green", "Bowling Green State")
df_fix_names = df_fix_names.replace("BYU", "Brigham Young")
df_fix_names = df_fix_names.replace("Miami Ohio", "Miami (OH)")
df_fix_names = df_fix_names.replace("Southern Miss", "Southern Mississippi")
df_fix_names = df_fix_names.replace("UNLV", "Nevada-Las Vegas")
df_fix_names = df_fix_names.replace("UTEP", "Texas-El Paso")
df_fix_names = df_fix_names.replace("UTSA", "Texas-San Antonio")
df_fix_names = df_fix_names.replace("Middle Tenn St", "Middle Tennessee State")
df_fix_names = df_fix_names.replace("Kent", "Kent State")
df_fix_names = df_fix_names.replace("Charlotte U", "Charlotte")
df_fix_names = df_fix_names.replace("UL Monroe", "Louisiana-Monroe")
df_fix_names = df_fix_names.replace("UAB", "Alabama-Birmingham")
df_fix_names = df_fix_names.replace("NW Missouri State", "Missouri State")
df_fix_names = df_fix_names.replace("Mary Hardin-Baylor", "Baylor")
df_fix_names = df_fix_names.replace("Texas A&M Commerce", "Texas A&M")
df_fix_names = df_fix_names.replace("McNeese St", "McNeese State")
df_fix_names = df_fix_names.replace("Tennessee Chat", "Chattanooga")
df_fix_names = df_fix_names.replace('Miami', 'Miami (FL)')
check_names(df_fix_names)

0
[]


In [62]:
df = df_fix_names
df

Unnamed: 0,Year,Season,Week,Ranking,College,SeasonRanking
0,2015,2015-16,1,1,Ohio,2.266667
1,2015,2015-16,1,2,Texas Christian,5.866667
2,2015,2015-16,1,3,Alabama,5.733333
3,2015,2015-16,1,4,Baylor,5.466667
4,2015,2015-16,1,5,Oregon,14.555556
5,2015,2015-16,1,6,Michigan State,5.333333
6,2015,2015-16,1,7,Auburn,13.500000
7,2015,2015-16,1,8,Florida State,10.600000
8,2015,2015-16,1,9,Georgia,12.750000
9,2015,2015-16,1,10,Southern California,16.777778


In [63]:
df_to_merge = df[["Season", "Ranking", "College", "SeasonRanking"]]
final_merged = pd.merge(left=game_data, right=df_to_merge,how='left', left_on=['School', 'Season'], right_on=['College', 'Season'])
del final_merged['College']
final_merged = final_merged.fillna(-1).drop_duplicates()
final_merged

Unnamed: 0,School,Opponent,Date,Season,AtHome,NeutralLocation,Score,OppScore,Total,Outcome,...,XPPercent,FGM,FGA,FGPercent,KickPts,Fum,Int,TotalTO,Ranking,SeasonRanking
0,Georgia State,Charlotte,2015-09-04,2015-16,1,0,20,23,43,L,...,100.0,2,3,66.7,8,2,1,3,-1.0,-1.000000
1,Oregon State,Weber State,2015-09-04,2015-16,1,0,26,7,33,W,...,100.0,4,4,100.0,14,0,1,1,-1.0,-1.000000
2,Syracuse,Rhode Island,2015-09-04,2015-16,1,0,47,0,47,W,...,100.0,1,2,50.0,9,0,1,1,-1.0,-1.000000
3,Western Michigan,Michigan State,2015-09-04,2015-16,1,0,24,37,61,L,...,100.0,1,2,50.0,6,0,2,2,-1.0,-1.000000
4,Central Michigan,Oklahoma State,2015-09-03,2015-16,1,0,13,24,37,L,...,100.0,2,2,100.0,7,0,1,1,-1.0,-1.000000
5,Fresno State,Abilene Christian,2015-09-03,2015-16,1,0,34,13,47,W,...,100.0,2,3,66.7,10,1,2,3,-1.0,-1.000000
6,Hawaii,Colorado,2015-09-03,2015-16,1,0,28,20,48,W,...,100.0,2,2,100.0,8,0,2,2,-1.0,-1.000000
7,Idaho,Ohio,2015-09-03,2015-16,1,0,28,45,73,L,...,100.0,2,2,100.0,8,1,2,3,-1.0,-1.000000
8,Minnesota,Texas Christian,2015-09-03,2015-16,1,0,17,23,40,L,...,100.0,1,1,100.0,5,2,0,2,-1.0,-1.000000
9,San Jose State,New Hampshire,2015-09-03,2015-16,1,0,43,13,56,W,...,66.7,1,2,50.0,7,0,0,0,-1.0,-1.000000


In [38]:
final_merged[["SeasonRanking"]].drop_duplicates().sort_values(["SeasonRanking"])

Unnamed: 0,SeasonRanking
2887,1.000000
5150,1.466667
319,2.266667
7044,2.333333
2519,2.866667
4891,3.333333
2424,3.400000
7074,4.066667
2600,4.533333
277,5.333333
