Import requirements

In [1]:
import pandas as pd
from datetime import datetime as dt

Here, we create the CFB Model Dataframes

In [2]:
class CFBDataframe:
    def __init__(self):
        # initialize dataframes with headers
        self.drives_df_list = []
        self.drives_df = pd.DataFrame()
        self.games_df_list = []
        self.games_df = pd.DataFrame()
        self.lines_df_list = []
        self.lines_df = pd.DataFrame()
        self.player_df_list = []
        self.player_df = pd.DataFrame()
        self.usage_df_list = []
        self.usage_df = pd.DataFrame()
        self.recruit_df_list = []
        self.recruit_df = pd.DataFrame()

    def csv_to_df(self, df_type, season):
        # set file name
        file = "../data/{i}/{j}_{i}.csv".format(i=df_type, j=season)
        # import data with Windows encoding
        try:
            df = pd.read_csv(file, encoding='ANSI')
        # if not Windows, do Mac encoding
        except LookupError:
            df = pd.read_csv(file, encoding='ISO-8859-1')
        # set first row as headers
        df.rename(columns=df.iloc[0])
        # concatenate all years of data on dataframe type match
        if df_type == "drives":
            self.drives_df_list.append(df)
            self.drives_df = pd.concat(self.drives_df_list)
        elif df_type == "games":
            self.games_df_list.append(df)
            self.games_df = pd.concat(self.games_df_list)
        elif df_type == "lines":
            self.lines_df_list.append(df)
            self.lines_df = pd.concat(self.lines_df_list)
        elif df_type == "player_season_stats":
            self.player_df_list.append(df)
            self.player_df = pd.concat(self.player_df_list)
        elif df_type == "player_usage":
            self.usage_df_list.append(df)
            self.usage_df = pd.concat(self.usage_df_list)
        elif df_type == "recruiting_players":
            self.recruit_df_list.append(df)
            self.recruit_df = pd.concat(self.recruit_df_list)


Build the CFB dataframe with the scraped game data

In [3]:
cfb_df = CFBDataframe()
for year in range(2010, dt.today().year):
    for directory in ["drives", "games", "lines", "player_season_stats", "player_usage", "recruiting_players"]:
        try:
            cfb_df.csv_to_df(directory, year)
        except FileNotFoundError:
            pass

Here, we initialize attributes and methods used to manipulate CFB Model

In [4]:
class CFBModel:
    def __init__(self, df):
        # create dictionary of dataframes
        self.data = {"drives": df.drives_df, "games": df.games_df, "lines": df.lines_df,
                     "player": df.player_df, "usage": df.usage_df, "recruit": df.recruit_df}

    def home_favored(self):
        mean_home_points = pd.Series.mean(self.data["games"]["_home_points"])
        mean_away_points = pd.Series.mean(self.data["games"]["_away_points"])
        home_favored = mean_home_points - mean_away_points
        return home_favored

Create and perform operations on model

In [5]:
cfb_model = CFBModel(cfb_df)
home_favor = cfb_model.home_favored()
print("Home teams win by an average of {:.2f} points".format(home_favor))

Home teams win by an average of 7.30 points
