## College Football Data Wrangling

#### The goal of this notebook is to pull in all the data from CollegeFootballData.com
##### Chris McAllister
#### ----------------------------------

###### Helpful Tutorial
https://blog.collegefootballdata.com/introduction-to-cfb-analytics/

###### Actual Documentation
https://api.collegefootballdata.com/api/docs/?url=/api-docs.json

###### Get custom API key emailed here:
https://collegefootballdata.com/key

In [1]:
import cfbd
import numpy as np
import pandas as pd

In [2]:
# Uncomment and run line below if cfbd library isn't already installed
#! pip install cfbd

In [3]:
# See link above to have custom API link emailed

api_key = 'jn8tnglKRmyayaeT0H3JG5TdxMxSnjLsFyW/QkRfhSS4UicMoNvZ71ao+gOXjMsI'

In [4]:
configuration = cfbd.Configuration()
configuration.api_key['Authorization'] = api_key
configuration.api_key_prefix['Authorization'] = 'Bearer'

api_config = cfbd.ApiClient(configuration)

#### Script to get every college football team, some attributes, and their name

In [5]:
teams_api = cfbd.TeamsApi(api_config)
teams = teams_api.get_fbs_teams()

df_teams = pd.DataFrame.from_records([t.to_dict() for t in teams])

df_teams = df_teams[['id', 'school']]


#### Function to get every college football game played over a timeframe and stored in a dataframe

In [6]:
import datetime

today = datetime.date.today()
current_year = today.year + 1

games_api = cfbd.GamesApi(api_config)

games = games_api.get_games(year=1900)
df_games = pd.DataFrame.from_records([g.to_dict() for g in games])


#### Get all Post season games:

In [28]:
# Get Current Year for end point
import datetime
today = datetime.date.today()
current_year = today.year + 1


#Establish API Connection and initial df
games_api = cfbd.GamesApi(api_config)
games = games_api.get_games(year=1900, season_type = 'postseason')
df_games_post = pd.DataFrame.from_records([g.to_dict() for g in games])

#Iterate over every year from 1901 to current season (post-season only)

for i in range(1901,  current_year):

    games = games_api.get_games(year=i, season_type = 'postseason')
    df_games_post_i = pd.DataFrame.from_records([g.to_dict() for g in games])
    
    df_games_post = pd.concat([df_games_post, df_games_post_i])

In [29]:
# Get Current Year for end point
import datetime
today = datetime.date.today()
current_year = today.year + 1


#Establish API Connection and initial df
games_api = cfbd.GamesApi(api_config)
games = games_api.get_games(year=1900, season_type = 'regular')
df_games_reg = pd.DataFrame.from_records([g.to_dict() for g in games])

#Iterate over every year from 1901 to current season (post-season only)

for i in range(1901,  current_year):

    games = games_api.get_games(year=i, season_type = 'regular')
    df_games_reg_i = pd.DataFrame.from_records([g.to_dict() for g in games])
    
    df_games_reg = pd.concat([df_games_reg, df_games_reg_i])

In [30]:
# Union postseason and regular season games together

df_games = pd.concat([df_games_reg, df_games_post])

In [32]:
game_columns = ['id', 'season', 'week', 'season_type', 'start_date',
                'start_time_tbd', 'completed', 'neutral_site', 'conference_game',
                'attendance', 'venue_id', 'home_id', 'home_conference',
                'home_division', 'home_points', 'home_line_scores', 'home_post_win_prob',
                'home_pregame_elo', 'home_postgame_elo', 'away_id', 'away_conference', 'away_division',
                'away_points', 'away_line_scores', 'away_post_win_prob', 'away_pregame_elo',
                'away_postgame_elo', 'excitement_index', 'notes']

In [14]:
# To insert data for the first time

#from sqlalchemy import create_engine
#engine = create_engine('postgresql://cmcallister:Thunder13@localhost:5432/cfb')
#df_games.to_sql('all_games', engine)

In [41]:
# To update table once it already exists:

import psycopg2
import pandas as pd
from sqlalchemy import create_engine
  

conn_string = 'postgresql://cmcallister:Thunder13@localhost:5432/cfb'
  
db = create_engine(conn_string)
conn = db.connect()
  

# our dataframe

  
# Create DataFrame
df_games.to_sql('all_games', con=conn, if_exists='replace',
          index=False)
conn = psycopg2.connect(conn_string
                        )
conn.autocommit = True
cursor = conn.cursor()
  
sql1 = '''select * from all_games;'''
cursor.execute(sql1)
#for i in cursor.fetchall():
    #print(i)

    
# conn.commit()
conn.close()

In [46]:
# Read in from DW:

#from sqlalchemy import create_engine
#engine = create_engine('postgresql://cmcallister:Thunder13@localhost:5432/cfb')

#con = engine.connect()

#table_name = 'all_games'
#base_data = pd.read_sql(table_name, con)