## SIADS Milestone I: CFB Analysis

#### The goal of this notebook is to answer the following questions:

1) How reliable is the ELO metric?
2) Do teams need to build talented rosters through recruiting in order to compete for championships? (Chris) 
3) Where do the best recruits come from and where do they go to school? (both) 
4) Is the college recruit rating a good indicator for their NFL draft. (Chris) 
 



In [1]:
# Uncomment and run line below if cfbd library isn't already installed
#! pip install cfbd

import cfbd
import numpy as np
import pandas as pd
import altair as alt
import cfbd

pd.set_option('display.max_columns', None)
alt.renderers.enable("html")

RendererRegistry.enable('html')

In [2]:
# Get the teams dataset straight form the api
# The CSV version of the url was corrupted

import config
api_key = config.api_key

def api_setup(api_key):

    """
    Configure the api. 
    Only input is the apikey which can be created from the link above.
    """
    import cfbd
    
    configuration = cfbd.Configuration()
    configuration.api_key['Authorization'] = api_key
    configuration.api_key_prefix['Authorization'] = 'Bearer'

    return cfbd.ApiClient(configuration)
    
api_config = api_setup(api_key)

def team_dataset():

    teams_api = cfbd.TeamsApi(api_config)
    teams = teams_api.get_fbs_teams()

    df_teams = pd.DataFrame.from_records([t.to_dict() for t in teams])
    df_teams
    df_teams = df_teams[['id', 'school', 'conference', 'division', 'color', 'logos']]
    
    return df_teams

df_teams = team_dataset()

# Remove brackets around image url
df_teams['logos'] = df_teams['logos'].str.get(0)

df_teams.head()

Unnamed: 0,id,school,conference,division,color,logos
0,2005,Air Force,Mountain West,Mountain,#004a7b,http://a.espncdn.com/i/teamlogos/ncaa/500/2005...
1,2006,Akron,Mid-American,East,#00285e,http://a.espncdn.com/i/teamlogos/ncaa/500/2006...
2,333,Alabama,SEC,West,#690014,http://a.espncdn.com/i/teamlogos/ncaa/500/333.png
3,2026,Appalachian State,Sun Belt,East,#000000,http://a.espncdn.com/i/teamlogos/ncaa/500/2026...
4,12,Arizona,Pac-12,,#002449,http://a.espncdn.com/i/teamlogos/ncaa/500/12.png


In [3]:
df = pd.read_csv('../data/games_manipulated.csv')

# Filter to only the power 5 conference week 12
power_5_conf = ['Pac-12', 'Big 12', 'ACC', 'SEC', 'Big Ten']
df = df[df['team_conference'].isin(power_5_conf)]
df = df[df['game_that_season'] == 12] # Final game of reg season

# Bring in only necessarry columns
df = df[['season', 'team_id', 'main_team', 'team_postgame_elo', 'team_conference']]

  df = pd.read_csv('../data/games_manipulated.csv')


In [4]:
final_df = pd.merge(left = df, right = df_teams, left_on = 'team_id', right_on = 'id')

final_df = final_df[['season', 'team_conference', 'team_postgame_elo', 'main_team', 'logos']]

In [5]:
alt.Chart(final_df).mark_image(opacity = .90, width = 20, height = 20).encode(
    
    x = alt.X('team_postgame_elo', title = 'ELO Rating at Season End', scale=alt.Scale(domain=[800, 2500])), 
                                y = alt.Y('team_conference', title = 'Conference'),
                                tooltip = ['main_team', 'team_postgame_elo'],
                                url = 'logos'
                               ).properties(height = 250, width = 300)\
.facet(facet = 'season:O', columns = 3).properties(title = 'End of Regular Season ELO Rating: 2013 - 2023')

#### Trend out ELO over time

In [6]:
df = pd.read_csv('../data/games_manipulated.csv')

# Filter to only the power 5 conference, regular season
power_5_conf = ['Pac-12', 'Big 12', 'ACC', 'SEC', 'Big Ten']
df = df[df['team_conference'].isin(power_5_conf)]
df = df[df['season_type'] == 'regular']

# Join on to teams dataset for team logos and color
final_df = pd.merge(left = df, right = df_teams, left_on = 'team_id', right_on = 'id')

# Bring in only necessarry columns
final_df = final_df[['season', 'team_id', 'main_team', 'game_that_season','team_postgame_elo', 'team_conference', 'logos', 'color']]

  df = pd.read_csv('../data/games_manipulated.csv')


In [7]:
power_5_conf = ['Pac-12', 'Big 12', 'ACC', 'SEC', 'Big Ten']

season = 2023
conference = 'Pac-12'

smaller_df = final_df[final_df['season'] == season]
smaller_df = smaller_df[smaller_df['team_conference'] == conference]

line = alt.Chart(smaller_df).mark_line(opacity = .8).encode(
    x = alt.X('game_that_season', title = 'Week'),
    y = alt.Y('team_postgame_elo', scale=alt.Scale(domain=[1000, 2500]), title = 'Post Game ELO'),
    color = alt.Color('color:N').scale(None)).properties(
    height = 400, width = 400, title = 'Trend in End of Game ELO Season')

dot_df = smaller_df[smaller_df['game_that_season'].isin([1,12])]

logos = alt.Chart(dot_df).mark_image(opacity = .95, width = 30, height = 30).encode(
        x = alt.X('game_that_season'),
        y = alt.Y('team_postgame_elo'),
        url = 'logos')

line + logos

#### ELO and Championships.

##### Of the power 5 teams with high ELOs, which ones won championships?

In [8]:
df = pd.read_csv('../data/games_manipulated.csv')

  df = pd.read_csv('../data/games_manipulated.csv')


In [9]:
df = pd.read_csv('../data/games_manipulated.csv')

# Filter to only the power 5 conference week 12
power_5_conf = ['Pac-12', 'Big 12', 'ACC', 'SEC', 'Big Ten']
df = df[df['team_conference'].isin(power_5_conf)]
df = df[df['game_that_season'] == 12] 

# Bring in only necessarry columns
df = df[['season', 'team_id', 'main_team', 'team_postgame_elo', 'team_conference']]

final_df = pd.merge(left = df, right = df_teams, left_on = 'team_id', right_on = 'id')

final_df = final_df[['season', 'team_conference', 'team_postgame_elo', 'main_team', 'logos', 'color']]

  df = pd.read_csv('../data/games_manipulated.csv')


In [10]:
# Get dataset of teams that competed in national championship game
df = pd.read_csv('../data/games_manipulated.csv')

substr_1 = 'NATIONAL CHAMPIONSHIP'
substr_2 = 'National Championship'

df = df[df['notes'].notna()]
a = df[df['notes'].str.contains(substr_1)]
b = df[df['notes'].str.contains(substr_2)]

championship_games = pd.concat([a, b])
championship_games['championship_appearance'] = 1
championship_games = championship_games.sort_values(by = 'season', ascending = True)[['main_team', 'season', 'win_flag', 'championship_appearance']]

  df = pd.read_csv('../data/games_manipulated.csv')


In [11]:
# Join championship dataset to end of regualr season game dataset

final_df.sort_values(by = 'team_postgame_elo', ascending = False)

new_df = pd.merge(final_df, championship_games,  how='left', left_on=['season','main_team'], right_on = ['season','main_team'])
new_df['championship_appearance']= new_df['championship_appearance'].fillna(0)
new_df['win_flag'] = new_df['win_flag'].fillna(0)
new_df = new_df.sort_values(by = 'team_postgame_elo', ascending = False)

new_df['year_team'] = new_df['season'].astype(str) + ' ' + new_df['main_team'].astype(str)

In [12]:
alt.Chart(new_df.head(20)).mark_bar(opacity = .9).encode(
    alt.X('team_postgame_elo', title = 'ELO at Regular Season End'),
    alt.Y('year_team', sort = '-x', title = 'Team'), 
    color = 'championship_appearance:N')\
.properties(title={
      "text": "Top 20 Teams According to End of Season ELO Rating", 
      "subtitle": "10 of the 13 Top ELO teams appeared in the Championship",
    }, 
    width = 600)