Import requirements

In [13]:
import os
import pandas as pd
from datetime import datetime as dt

Here, we create the CFB Model Dataframes

In [14]:
class CFBDataframe:
    def __init__(self):
        # initialize dataframes with headers
        self.drives = [[], pd.DataFrame()]
        self.games = [[], pd.DataFrame()]
        self.lines = [[], pd.DataFrame()]
        self.player_game_stats = [[], pd.DataFrame()]
        self.player_season_stats = [[], pd.DataFrame()]
        self.player_usage = [[], pd.DataFrame()]
        self.recruiting_groups = [[], pd.DataFrame()]
        self.recruiting_players = [[], pd.DataFrame()]
        self.recruiting_teams = [[], pd.DataFrame()]
        self.venues = [[], pd.DataFrame()]

    def season_csv_to_df(self, df_type, season):
        # set file name
        file = "../data/{i}/{j}_{i}.csv".format(i=df_type, j=season)
        # import data with Windows encoding
        try:
            df = pd.read_csv(file, encoding='ANSI')
        # if not Windows, do Mac encoding
        except LookupError:
            df = pd.read_csv(file, encoding='ISO-8859-1')
        # set first row as headers
        df.rename(columns=df.iloc[0])
        # concatenate all years of data on dataframe type match
        if df_type == "drives":
            self.drives[0].append(df)
            self.drives[1] = pd.concat(self.drives[0])
        elif df_type == "games":
            self.games[0].append(df)
            self.games[1] = pd.concat(self.games[0])
        elif df_type == "lines":
            self.lines[0].append(df)
            self.lines[1] = pd.concat(self.lines[0])
        elif df_type == "player_season_stats":
            self.player_season_stats[0].append(df)
            self.player_season_stats[1] = pd.concat(self.player_season_stats[0])
        elif df_type == "player_usage":
            self.player_usage[0].append(df)
            self.player_usage[1] = pd.concat(self.player_usage[0])
        elif df_type == "recruiting_players":
            self.recruiting_players[0].append(df)
            self.recruiting_players[1] = pd.concat(self.recruiting_players[0])
    
    def week_csv_to_df(self, df_type, game, season):
        file = "../data/{i}/{k}_week_{j}_{i}.csv".format(i=df_type, j=game, k=season)
        try:
            df = pd.read_csv(file, encoding='ANSI')
        except LookupError:
            df = pd.read_csv(file, encoding='ISO-8859-1')
        except:  # pandas empty data error happens on a few
            return
        df.rename(columns=df.iloc[0])
        self.player_game_stats[0].append(df)
        self.player_game_stats[1] = pd.concat(self.player_game_stats[0])
        
    def csv_to_df(self, df_type):
        file = "../data/{i}/{i}.csv".format(i=df_type)
        try:
            df = pd.read_csv(file, encoding='ANSI')
        except LookupError:
            df = pd.read_csv(file, encoding='ISO-8859-1')
        df.rename(columns=df.iloc[0])
        if df_type == "recruiting_groups":
            self.recruiting_groups[0].append(df)
            self.recruiting_groups[1] = pd.concat(self.recruiting_groups[0])
        elif df_type == "recruiting_teams":
            self.recruiting_teams[0].append(df)
            self.recruiting_teams[1] = pd.concat(self.recruiting_teams[0])
        elif df_type == "venues":
            self.venues[0].append(df)
            self.venues[1] = pd.concat(self.venues[0])

Build the CFB dataframe with the scraped game data

In [15]:
cfb_df = CFBDataframe()
for directory in next(os.walk('../data'))[1]:
    if directory in ["recruiting_groups", "recruiting_teams", "venues"]:
        try:
            cfb_df.csv_to_df(directory)
        except FileNotFoundError:
            pass
    else:
        for year in range(2010, dt.today().year):
            if directory == "player_game_stats":
                for week in range(1, 17):
                    try:
                        cfb_df.week_csv_to_df(directory, week, year)
                    except FileNotFoundError:
                        pass
            else:
                try:
                    cfb_df.season_csv_to_df(directory, year)
                except FileNotFoundError:
                    pass

Here, we initialize attributes and methods used to manipulate CFB Model

In [16]:
class CFBModel:
    def __init__(self, df):
        # create dictionary of dataframes
        self.data = {"drives": df.drives[1], "games": df.games[1], "lines": df.lines[1],
                     "player_game_stats": df.player_game_stats[1], "player_season_stats": df.player_season_stats[1],
                     "player_usage": df.player_usage[1], "recruiting_groups": df.recruiting_groups[1],
                     "recruiting_players": df.recruiting_players[1], "recruiting_teams": df.recruiting_teams[1],
                     "venues": df.venues[1]}

    def home_favored(self):
        mean_home_points = pd.Series.mean(self.data["games"]["_home_points"])
        mean_away_points = pd.Series.mean(self.data["games"]["_away_points"])
        home_favored = mean_home_points - mean_away_points
        return home_favored

Create and perform operations on model

In [17]:
cfb_model = CFBModel(cfb_df)
home_favor = cfb_model.home_favored()
print("Home teams win by an average of {:.2f} points".format(home_favor))
num_cells = cfb_model.data['drives'].size + cfb_model.data['games'].size + cfb_model.data['lines'].size + \
            cfb_model.data['player_game_stats'].size + cfb_model.data['player_season_stats'].size + \
            cfb_model.data['player_usage'].size + cfb_model.data['recruiting_groups'].size + \
            cfb_model.data['recruiting_players'].size + cfb_model.data['recruiting_teams'].size + \
            cfb_model.data['venues'].size
print("We have {} cells of data to work with 👀".format(num_cells))

Home teams win by an average of 7.18 points
We have 12507246 cells of data to work with 👀
