In [1]:
# baseball_h2h_predictor_harness.ipynb
# Alexis Perumal, Venkat Pinnika, Young You, 1/6/2020
# Derived from baseball_predictor_harness.ipynb
#
# Calls one or more predictor algorithms to predict baseball game outcomes across a range
# of dates. This harness captures the outcomes in a Pandas DF and writes it out to a file
# for further analysis of effectiveness and hyperparameter tuning.

In [2]:
# Modules
import os
import csv
import pprint
import pandas as pd
import pprint
import datetime

import alexis_h2h_predictor as ah2h

In [3]:
outcomes_df = pd.DataFrame(columns=['Season', 'Lookback', 'Num Games',
                                  'Num Correct', '% Correct',
                                   'Timestamp', 'Run Duration',
                                   'Comment'])
# outcomes_df

In [4]:
for n in [20, 200, 500, 1000 ]:
    for season in range(2016, 2018):
        start_time = datetime.datetime.now()
        num_games, num_correct, percent_correct = ah2h.h2h_predictor(n, season)
        end_time = datetime.datetime.now()
        print(f"  Result: {round(percent_correct,1)}%")
        print("")

        new_row = pd.Series({'Season':season,
                               'Lookback':n,
                               'Num Games':num_games,
                               'Num Correct':num_correct,
                               '% Correct':percent_correct,
                               'Timestamp':start_time,
                               'Run Duration':end_time - start_time,
                               'Comment':'H2H, prioritize net runs over wins, FLO fix included.'})
        outcomes_df = outcomes_df.append(new_row, ignore_index=True)
    
outcomes_df

Dataset loaded with 19437 games, 10 columns, 2010-04-04 - 2017-10-01
  Analyzing 2016 season: 2016-04-03 - 2016-10-02, with 20 day lookback.
  2020-01-06 23:53:41.361938: Predictions calculated in 0:00:11.517970 hr/min/sec.
  Result: 52.8%

Dataset loaded with 19437 games, 10 columns, 2010-04-04 - 2017-10-01
  Analyzing 2017 season: 2017-04-02 - 2017-10-01, with 20 day lookback.
  2020-01-06 23:53:53.673719: Predictions calculated in 0:00:11.377983 hr/min/sec.
  Result: 51.7%

Dataset loaded with 19437 games, 10 columns, 2010-04-04 - 2017-10-01
  Analyzing 2016 season: 2016-04-03 - 2016-10-02, with 200 day lookback.
  2020-01-06 23:54:06.812569: Predictions calculated in 0:00:12.296993 hr/min/sec.
  Result: 52.2%

Dataset loaded with 19437 games, 10 columns, 2010-04-04 - 2017-10-01
  Analyzing 2017 season: 2017-04-02 - 2017-10-01, with 200 day lookback.
  2020-01-06 23:54:18.905897: Predictions calculated in 0:00:11.197258 hr/min/sec.
  Result: 52.4%

Dataset loaded with 19437 games, 1

Unnamed: 0,Season,Lookback,Num Games,Num Correct,% Correct,Timestamp,Run Duration,Comment
0,2016,20,2428,1281,52.759473,2020-01-06 23:53:28.836089,00:00:12.526508,"H2H, prioritize net runs over wins, FLO fix in..."
1,2017,20,2430,1256,51.687243,2020-01-06 23:53:41.366158,00:00:12.308227,"H2H, prioritize net runs over wins, FLO fix in..."
2,2016,200,2428,1268,52.224053,2020-01-06 23:53:53.678286,00:00:13.134847,"H2H, prioritize net runs over wins, FLO fix in..."
3,2017,200,2430,1273,52.386831,2020-01-06 23:54:06.816683,00:00:12.089828,"H2H, prioritize net runs over wins, FLO fix in..."
4,2016,500,2428,1260,51.894563,2020-01-06 23:54:18.910752,00:00:13.137810,"H2H, prioritize net runs over wins, FLO fix in..."
5,2017,500,2430,1309,53.868313,2020-01-06 23:54:32.052571,00:00:12.859352,"H2H, prioritize net runs over wins, FLO fix in..."
6,2016,1000,2428,1250,51.482702,2020-01-06 23:54:44.916979,00:00:13.460396,"H2H, prioritize net runs over wins, FLO fix in..."
7,2017,1000,2430,1268,52.18107,2020-01-06 23:54:58.381299,00:00:12.757949,"H2H, prioritize net runs over wins, FLO fix in..."


In [5]:
path = "output/" + str(datetime.datetime.now()).replace(':', '-').replace(' ', '_') + "_output.csv"
outcomes_df.to_csv(path_or_buf=path)

In [6]:
outcomes_df

Unnamed: 0,Season,Lookback,Num Games,Num Correct,% Correct,Timestamp,Run Duration,Comment
0,2016,20,2428,1281,52.759473,2020-01-06 23:53:28.836089,00:00:12.526508,"H2H, prioritize net runs over wins, FLO fix in..."
1,2017,20,2430,1256,51.687243,2020-01-06 23:53:41.366158,00:00:12.308227,"H2H, prioritize net runs over wins, FLO fix in..."
2,2016,200,2428,1268,52.224053,2020-01-06 23:53:53.678286,00:00:13.134847,"H2H, prioritize net runs over wins, FLO fix in..."
3,2017,200,2430,1273,52.386831,2020-01-06 23:54:06.816683,00:00:12.089828,"H2H, prioritize net runs over wins, FLO fix in..."
4,2016,500,2428,1260,51.894563,2020-01-06 23:54:18.910752,00:00:13.137810,"H2H, prioritize net runs over wins, FLO fix in..."
5,2017,500,2430,1309,53.868313,2020-01-06 23:54:32.052571,00:00:12.859352,"H2H, prioritize net runs over wins, FLO fix in..."
6,2016,1000,2428,1250,51.482702,2020-01-06 23:54:44.916979,00:00:13.460396,"H2H, prioritize net runs over wins, FLO fix in..."
7,2017,1000,2430,1268,52.18107,2020-01-06 23:54:58.381299,00:00:12.757949,"H2H, prioritize net runs over wins, FLO fix in..."
