In [1]:
import pickle
import pandas as pd

from ipynb.fs.full.createModel import getTrainingSet, createDataFrame
from ipynb.fs.full.configureCWD import setCurrentWorkingDirectory


In [2]:
def getTrainingSetCSV(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason, filename='gamesWithInfo.csv'):

    # Gets date, teams, and z-score difs for every game within range
    rangeOfGames = getTrainingSet(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason)
    rangeOfGamesDataframe = createDataFrame(rangeOfGames)
    
#     setCurrentWorkingDirectory('Data')

    rangeOfGamesDataframe.to_csv(filename)

In [3]:
# Creates a csv file that gives predictions for range of games
# Prints accuracy of model in predicting games for specified range
# gameDataFilename and outputFilename must be '.csv' files
def getPredictionsCSV(gameDataFilename, outputFilename):

#     setCurrentWorkingDirectory('Data')

    gamesWithZScoreDifs = pd.read_csv(gameDataFilename)

    withoutNums = gamesWithZScoreDifs.loc[:, 'Home':'Date']  # Slices dataframe to only includes home through date
    justZScoreDifs = gamesWithZScoreDifs.loc[:, 'W_PCT':'TS_PCT']  # Slices dataframe to only include statistical differences

#     setCurrentWorkingDirectory('SavedModels')
    with open('finalized_model.pkl', 'rb') as file:  # Change filename here if model is named differently
        pickleModel = pickle.load(file)

    predictions = pickleModel.predict(justZScoreDifs)  # Creates list of predicted winners and losers
    probPredictions = pickleModel.predict_proba(justZScoreDifs)  # Creates list of probabilities that home team wins

    numCorrect = 0
    numWrong = 0
    allGames = []

    for i in range(len(probPredictions)):

        winProbability = probPredictions[i][1]
        homeTeam = withoutNums.iloc[i, 0]
        awayTeam = withoutNums.iloc[i, 1]
        date = withoutNums.iloc[i, 10]

        currentGameWithPred = [date, homeTeam, awayTeam, winProbability]

        allGames.append(currentGameWithPred)

        # Creates dataframe that holds all games info and predictions
        predictionsDF = pd.DataFrame(
            allGames,
            columns=['Date', 'Home', 'Away', 'Home Team Win Probability']
        )

#         setCurrentWorkingDirectory('Data')
        predictionsDF.to_csv(outputFilename)  # Saves game info with predictions in data folder as csv file

        value = withoutNums.iloc[i,9]
        if value == predictions[i]:
            numCorrect += 1
        else :
            numWrong += 1

    print('Accuracy:')
    print((numCorrect)/(numCorrect+numWrong))  # Prints accuracy of model in predicting games for specified range

In [4]:
# # Generates probability predictions over specified range of games exports them to a csv with game info
# # gameDataFilename and outputFilename must end in '.csv'
# # season must be in form 'yyyy-yy' and startDateOfSeason must be in form 'mm/dd/yyyy'
def makePastPredictions(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason,
                       gameDataFilename='gamesWithInfo.csv', outputFilename='predictions.csv'):

    # Obtains info for range of games
    getTrainingSetCSV(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason,
                          gameDataFilename)
#     # Makes probabilities for range of games
#     getPredictionsCSV(gameDataFilename, outputFilename)

In [5]:
# # start date (yyyy, m, d) (must be at least three days after start of season), end date (yyyy, m, d) (non-inclusive),
# # season(yyyy-yy), start date of season (mm/dd/yyyy), .csv filename for games with z score differences,
# # .csv filename for games with predictions
# # EDIT THIS
makePastPredictions(2020, 8, 8, 2020, 8, 10, '2019-20', '10/16/2019',
                    'gamesWithInfo.csv', 'predictions.csv')

08/08/2020
['Miami Heat', 'Phoenix Suns', 1.1802345914620955, 1.0322167307471481, -0.3001059871744429, 0.20232021981643478, 0.8167438320029146, 0.6671322849598795, -0.5158360513340187, 1.0954185561539456, 0]
['Dallas Mavericks', 'Milwaukee Bucks', -1.3811255857535165, -1.0838275672845064, -0.12004239486977666, -1.5174016486232527, -0.9800925984034974, 1.2972016651997675, 2.7081392695035853, -0.2738546390384864, 1]
['Portland Trail Blazers', 'LA Clippers', -1.3748477421819094, -0.8773842211350773, -1.6205723307419904, -1.5174016486232529, -1.5722318766056105, -0.18531452359996692, 2.2890224777946937, -0.5477092780769728, 0]
['Denver Nuggets', 'Utah Jazz', 0.18205746357660002, -0.20644334614943255, 2.8209962794397616, -1.2139213188986016, -0.02041859580007288, 0.29650323775995113, 0.25791802566700706, -1.3008095354328102, 1]
['Indiana Pacers', 'Los Angeles Lakers', -0.8851759435965718, -1.3418817499712934, 0.5401907769139961, -2.023202198164337, -0.8575810236030603, -0.5188806660799039, 

In [None]:
csv = pd.read_csv("gamesWithInfo.csv")
start_date = csv["Date"][len(csv)-1].split('/')
print(start_date)
month = int(start_date[0])
day = int(start_date[1])
print(month)
print(day)

In [6]:
def updateCSV(filename):
    old_df = pd.read_csv(filename)
    
    start_date = old_df["Date"][len(old_df)-1].split('/')
    month = int(start_date[0])
    day = int(start_date[1])
    year = int(start_date[2])
    
    new_df = createDataFrame(getTrainingSet(year, month, day,2020, 8, 11,'2019-20', '10/16/2019'))
    uptodate = old_df.append(new_df)
    
    uptodate.to_csv(filename)

In [7]:
updateCSV("gamesWithInfo.csv")

08/09/2020
['Boston Celtics', 'Orlando Magic', 1.3284672706183467, 0.3628882450747679, -1.0249543747901384, 0.7045333157189532, 1.5004579765280763, 1.8838003552234293, -0.7810022447955636, 1.8498597848747842, 1]
['Portland Trail Blazers', 'Philadelphia 76ers', -0.9995134702747559, -0.4147294229425887, -3.255737425803969, -1.4090666314379066, -0.7399518788357635, 0.960368808545274, 1.9850473721887218, 0.41107995219439813, 1]
['Sacramento Kings', 'Houston Rockets', -1.3411193398623311, -0.10368235573564899, 1.6278687129019846, 0.4025904661251169, -1.1921446936798414, -1.47749047468504, 0.488126402997226, -1.0962132058517284, 0]
['Oklahoma City Thunder', 'Washington Wizards', 1.7080293479378745, 1.347870624563415, -1.5072858452796158, 0.0, 1.356578444532233, 0.0, -2.212839693587428, 0.6851332536573302, 1]
['Toronto Raptors', 'Memphis Grizzlies', 1.5815086554980318, -0.20736471147129437, -0.6632057719230298, -0.3019428495938363, 1.4387953199584291, 0.664870713608267, -1.6596297701905665, 0