Import requirements

In [1]:
import os
import pandas as pd
from datetime import datetime as dt

Here, we create the CFB Model Dataframes

In [2]:
class CFBDataframe:
    def __init__(self):
        # list of dfs and empty init df
        self.drives = [[], pd.DataFrame()]
        self.games = [[], pd.DataFrame()]
        self.lines = [[], pd.DataFrame()]
        self.player_game_stats = [[], pd.DataFrame()]
        self.player_season_stats = [[], pd.DataFrame()]
        self.player_usage = [[], pd.DataFrame()]
        self.recruiting_groups = [[], pd.DataFrame()]
        self.recruiting_players = [[], pd.DataFrame()]
        self.recruiting_teams = [[], pd.DataFrame()]
        self.venues = [[], pd.DataFrame()]

    @staticmethod
    def append_dfs(df, df_type):
        # append df to list of dfs
        df_type[0].append(df)
        # concat list of dfs
        df_type[1] = pd.concat(df_type[0])

    @staticmethod
    def impute_df(file):
        # import data with Windows encoding
        try:
            df = pd.read_csv(file, encoding='ANSI')
        # if not Windows, do Mac encoding
        except LookupError:
            df = pd.read_csv(file, encoding='ISO-8859-1')
        # if empty data, skip file
        except pd.errors.EmptyDataError:
            return
        # set first row as headers
        df.rename(columns=df.iloc[0])
        return df

    def season_csv_to_df(self, df_type, season):
        file = "../data/{i}/{j}_{i}.csv".format(i=df_type, j=season)
        df = self.impute_df(file)
        if df_type == "drives":
            self.append_dfs(df, self.drives)
        elif df_type == "games":
            self.append_dfs(df, self.games)
        elif df_type == "lines":
            self.append_dfs(df, self.lines)
        elif df_type == "player_season_stats":
            self.append_dfs(df, self.player_season_stats)
        elif df_type == "player_usage":
            self.append_dfs(df, self.player_usage)
        elif df_type == "recruiting_players":
            self.append_dfs(df, self.recruiting_players)
    
    def week_csv_to_df(self, df_type, game, season):
        file = "../data/{i}/{k}_week_{j}_{i}.csv".format(i=df_type, j=game, k=season)
        df = self.impute_df(file)
        self.append_dfs(df, self.player_game_stats)
        
    def csv_to_df(self, df_type):
        file = "../data/{i}/{i}.csv".format(i=df_type)
        df = self.impute_df(file)
        if df_type == "recruiting_groups":
            self.append_dfs(df, self.recruiting_groups)
        elif df_type == "recruiting_teams":
            self.append_dfs(df, self.recruiting_teams)
        elif df_type == "venues":
            self.append_dfs(df, self.venues)

Build the CFB dataframe with the scraped game data

In [3]:
cfb_df = CFBDataframe()
for directory in next(os.walk('../data'))[1]:
    # non time specific dfs
    if directory in ["recruiting_groups", "recruiting_teams", "venues"]:
        try:
            cfb_df.csv_to_df(directory)
        except FileNotFoundError:
            pass
    else:
        # season specific dfs
        for year in range(2010, dt.today().year):
            # week specific df
            if directory == "player_game_stats":
                for week in range(1, 17):
                    try:
                        cfb_df.week_csv_to_df(directory, week, year)
                    except FileNotFoundError:
                        pass
            else:
                try:
                    cfb_df.season_csv_to_df(directory, year)
                except FileNotFoundError:
                    pass

Here, we initialize attributes and methods used to manipulate CFB Model

In [4]:
class CFBModel:
    def __init__(self, df):
        # dict of dfs
        self.data = {"drives": df.drives[1], "games": df.games[1], "lines": df.lines[1],
                     "player_game_stats": df.player_game_stats[1], "player_season_stats": df.player_season_stats[1],
                     "player_usage": df.player_usage[1], "recruiting_groups": df.recruiting_groups[1],
                     "recruiting_players": df.recruiting_players[1], "recruiting_teams": df.recruiting_teams[1],
                     "venues": df.venues[1]}

    def home_favored(self):
        mean_home_points = pd.Series.mean(self.data["games"]["_home_points"])
        mean_away_points = pd.Series.mean(self.data["games"]["_away_points"])
        home_favored = mean_home_points - mean_away_points
        return home_favored

Create and perform operations on model

In [5]:
cfb_model = CFBModel(cfb_df)
home_favor = cfb_model.home_favored()
print("Home teams win by an average of {:.2f} points".format(home_favor))
num_cells = cfb_model.data['drives'].size + cfb_model.data['games'].size + cfb_model.data['lines'].size + \
            cfb_model.data['player_game_stats'].size + cfb_model.data['player_season_stats'].size + \
            cfb_model.data['player_usage'].size + cfb_model.data['recruiting_groups'].size + \
            cfb_model.data['recruiting_players'].size + cfb_model.data['recruiting_teams'].size + \
            cfb_model.data['venues'].size
print("We have {} cells of data to work with 👀".format(num_cells))

Home teams win by an average of 7.18 points
We have 12507246 cells of data to work with 👀
