Import requirements

In [1]:
import os
import pandas as pd
from datetime import datetime as dt
import unittest

# pd.options.mode.chained_assignment = None
pd.options.display.width=0
pd.set_option('display.expand_frame_repr', False)

Here, we create the CFB Model Dataframes

In [2]:
from CFBDataframe import CFBDataframe

Build the CFB dataframe with the scraped game data

In [3]:
cfb_df = CFBDataframe()
for directory in next(os.walk('../data'))[1]:
    # non time specific dfs
    if directory in ["cleaned_games","recruiting_groups", "recruiting_teams", "venues"]:
        try:
            cfb_df.csv_to_df([directory])
        except FileNotFoundError:
            print("Missing data from", directory)
    else:
        # season specific dfs
        for year in range(2010, dt.today().year):
            # week specific df
            if directory == "player_game_stats":
                for week in range(1, 17):
                    try:
                        cfb_df.csv_to_df([directory, week, year])
                    except FileNotFoundError:
                        print("Missing game data for", year, "week", week)
            elif directory == "__pycache__":
                continue
            else:
                try:
                    cfb_df.csv_to_df([directory, year])
                except FileNotFoundError:
                    print("Missing", year, "data for", directory)

Missing 2010 data for lines
Missing 2011 data for lines
Missing 2012 data for lines
Missing game data for 2010 week 16
Missing game data for 2011 week 16
Missing game data for 2012 week 16
Missing game data for 2015 week 16
Missing game data for 2016 week 16
Missing game data for 2017 week 16
Missing game data for 2018 week 16
Missing game data for 2019 week 16
Missing 2010 data for player_usage
Missing 2011 data for player_usage
Missing 2012 data for player_usage


Here, we initialize attributes and methods used to manipulate CFB Model

In [4]:
from CFBModel import CFBModel

Create and perform operations on model

In [5]:
cfb_model = CFBModel(cfb_df.data_map)
home_favor = cfb_model.home_favored()
print("Home teams win by an average of {:.2f} points".format(home_favor))
num_cells = sum(cfb_model.data[k].size for k in cfb_model.data)
print("We have {} cells of data to work with 👀".format(num_cells))
# print("Prediction", cfb_model.regression_predict([0.77, 34]))

Home teams win by an average of 7.18 points
We have 13328242 cells of data to work with 👀


In [6]:
spread_regression_model = CFBModel(cfb_df.data_map)

In [7]:
spread_model_score = spread_regression_model.spread_prediction()
print(spread_model_score)

0.24035397058506502


Unit testing

In [8]:
class TestRanking(unittest.TestCase):

    def test_dfs(self):
        self.assertGreater(num_cells, 0)
        self.assertEqual(type(cfb_model.data), dict)
        self.assertEqual(len(cfb_model.data), 11)


unittest.main(argv=[''], verbosity=0, exit=False)

----------------------------------------------------------------------
Ran 1 test in 0.000s

OK


<unittest.main.TestProgram at 0x2261221efd0>