In [1]:
# baseball_predictor_harness-npp-edb.ipynb
# Alexis Perumal, Venkat Pinnika, Young You, 1/5/2020
#
# Calls one or more predictor algorithms to predict baseball game outcomes across a range
# of dates. This harness captures the outcomes in a Pandas DF and writes it out to a file
# for further analysis of effectiveness and hyperparameter tuning.
# 
# This variant of the harness, forked 1/6/20, calls the npp predictor with the option
# to exclude the day before gameday in the lookback window.

In [2]:
# Modules
import os
import csv
import pprint
import pandas as pd
import pprint
import datetime

import net_points_predictor as npp

In [3]:
outcomes_df = pd.DataFrame(columns=['Season', 'Lookback', 'Num Games',
                                  'Num Correct', '% Correct',
                                   'Timestamp', 'Run Duration',
                                   'Comment'])
# outcomes_df

In [6]:
#for n in [10, 25, 50, 100, 200, 300]:
# for n in [1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 50, 100, 200]:
#     for season in range(2011, 2018):
for n in [2]:
    for season in range(2012, 2013):
        start_time = datetime.datetime.now()
        num_games, num_correct, percent_correct = npp.net_points_predictor(n, season, \
                                                                          excl_day_before=False)
        end_time = datetime.datetime.now()
        print(f"  Result: {round(percent_correct,1)}%")
        print("")

        new_row = pd.Series({'Season':season,
                               'Lookback':n,
                               'Num Games':num_games,
                               'Num Correct':num_correct,
                               '% Correct':percent_correct,
                               'Timestamp':start_time,
                               'Run Duration':end_time - start_time,
                               'Comment':'npp, excl. day before=False, FLO fix included.'})
        outcomes_df = outcomes_df.append(new_row, ignore_index=True)
        
        # Now do it again with Exclude Day Before
        start_time = datetime.datetime.now()
        num_games, num_correct, percent_correct = npp.net_points_predictor(n, season, \
                                                                          excl_day_before=True)
        end_time = datetime.datetime.now()
        print(f"  Result: {round(percent_correct,1)}%")
        print("")

        new_row = pd.Series({'Season':season,
                               'Lookback':n,
                               'Num Games':num_games,
                               'Num Correct':num_correct,
                               '% Correct':percent_correct,
                               'Timestamp':start_time,
                               'Run Duration':end_time - start_time,
                               'Comment':'npp, excl. day before=True, FLO fix included.'})
        outcomes_df = outcomes_df.append(new_row, ignore_index=True)
    
outcomes_df

Dataset loaded with 19437 games, 10 columns, 2010-04-04 - 2017-10-01
  Analyzing 2012 season: 2012-03-28 - 2012-04-30, with 2 day lookback.
  2020-01-07 15:52:36.080772: Starting build of net runs rolling average tables.
  2020-01-07 15:52:44.338937: Net point tables calculated in 0:00:08.258165 hr/min/sec.
  Results table of length 338
2 ties out of 338 games.
  2020-01-07 15:52:44.356950: Predictions calculated in 0:00:00.017682 hr/min/sec.
         Date Visiting Team Visiting League Home Team Home League  \
328  20120430           OAK              AL       BOS          AL   
329  20120430           BAL              AL       NYA          AL   
330  20120430           SEA              AL       TBA          AL   
331  20120430           TEX              AL       TOR          AL   
332  20120430           PIT              NL       ATL          NL   
333  20120430           LAN              NL       COL          NL   
334  20120430           NYN              NL       HOU          NL   
3

Unnamed: 0,Season,Lookback,Num Games,Num Correct,% Correct,Timestamp,Run Duration,Comment
0,2012,10,338,201,59.467456,2020-01-07 15:50:50.847186,00:00:11.639137,"npp, excl. day before=False, FLO fix included."
1,2012,10,338,173,51.183432,2020-01-07 15:51:02.489448,00:00:11.127247,"npp, excl. day before=True, FLO fix included."
2,2012,1,338,163,48.224852,2020-01-07 15:51:35.262382,00:00:08.832338,"npp, excl. day before=False, FLO fix included."
3,2012,1,338,163,48.224852,2020-01-07 15:51:44.098080,00:00:09.106080,"npp, excl. day before=True, FLO fix included."
4,2012,2,338,170,50.295858,2020-01-07 15:52:35.069791,00:00:09.291694,"npp, excl. day before=False, FLO fix included."
5,2012,2,338,163,48.224852,2020-01-07 15:52:44.364852,00:00:09.301541,"npp, excl. day before=True, FLO fix included."


In [None]:
path = "output/" + str(datetime.datetime.now()).replace(':', '-').replace(' ', '_') + "_output.csv"
outcomes_df.to_csv(path_or_buf=path)

In [None]:
outcomes_df