In [10]:
import pickle
import pandas as pd
import datetime

from ipynb.fs.full.createModel import getTrainingSet, createDataFrame
from ipynb.fs.full.configureCWD import setCurrentWorkingDirectory
import os.path
from os import path


In [11]:
def getTrainingSetCSV(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason, filename='gamesWithInfo.csv'):

    # Gets date, teams, and z-score difs for every game within range
    rangeOfGames = getTrainingSet(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason)
    rangeOfGamesDataframe = createDataFrame(rangeOfGames)
    
#     setCurrentWorkingDirectory('Data')

    rangeOfGamesDataframe.to_csv(filename)

In [12]:
# Creates a csv file that gives predictions for range of games
# Prints accuracy of model in predicting games for specified range
# gameDataFilename and outputFilename must be '.csv' files
def getPredictionsCSV(gameDataFilename, outputFilename):

#     setCurrentWorkingDirectory('Data')

    gamesWithZScoreDifs = pd.read_csv(gameDataFilename)

    withoutNums = gamesWithZScoreDifs.loc[:, 'Home':'Date']  # Slices dataframe to only includes home through date
    justZScoreDifs = gamesWithZScoreDifs.loc[:, 'W_PCT':'TS_PCT']  # Slices dataframe to only include statistical differences

#     setCurrentWorkingDirectory('SavedModels')
    with open('finalized_model.pkl', 'rb') as file:  # Change filename here if model is named differently
        pickleModel = pickle.load(file)

    predictions = pickleModel.predict(justZScoreDifs)  # Creates list of predicted winners and losers
    probPredictions = pickleModel.predict_proba(justZScoreDifs)  # Creates list of probabilities that home team wins

    numCorrect = 0
    numWrong = 0
    allGames = []

    for i in range(len(probPredictions)):

        winProbability = probPredictions[i][1]
        homeTeam = withoutNums.iloc[i, 0]
        awayTeam = withoutNums.iloc[i, 1]
        date = withoutNums.iloc[i, 10]

        currentGameWithPred = [date, homeTeam, awayTeam, winProbability]

        allGames.append(currentGameWithPred)

        # Creates dataframe that holds all games info and predictions
        predictionsDF = pd.DataFrame(
            allGames,
            columns=['Date', 'Home', 'Away', 'Home Team Win Probability']
        )

#         setCurrentWorkingDirectory('Data')
        predictionsDF.to_csv(outputFilename)  # Saves game info with predictions in data folder as csv file

        value = withoutNums.iloc[i,9]
        if value == predictions[i]:
            numCorrect += 1
        else :
            numWrong += 1

    print('Accuracy:')
    print((numCorrect)/(numCorrect+numWrong))  # Prints accuracy of model in predicting games for specified range

In [13]:
# # Generates probability predictions over specified range of games exports them to a csv with game info
# # gameDataFilename and outputFilename must end in '.csv'
# # season must be in form 'yyyy-yy' and startDateOfSeason must be in form 'mm/dd/yyyy'
def makePastPredictions(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason,
                       gameDataFilename='gamesWithInfo.csv'):

    # Obtains info for range of games
    getTrainingSetCSV(startYear, startMonth, startDay, endYear, endMonth, endDay, season, startDateOfSeason,
                          gameDataFilename)
#     # Makes probabilities for range of games
#     getPredictionsCSV(gameDataFilename, outputFilename)

In [None]:
# # start date (yyyy, m, d) (must be at least three days after start of season), end date (yyyy, m, d) (non-inclusive),
# # season(yyyy-yy), start date of season (mm/dd/yyyy), .csv filename for games with z score differences,
# # .csv filename for games with predictions
# # EDIT THIS
makePastPredictions(2019, 10, 25, 2020, 3, 12, '2019-20', '10/16/2019',
                    'gamesWithInfo.csv')

In [None]:
csv = pd.read_csv("gamesWithInfo.csv")
start_date = csv["Date"][len(csv)-1].split('/')
print(start_date)
month = int(start_date[0])
day = int(start_date[1])
print(month)
print(day)

In [14]:
def updateCSV(filename):
    old_df = pd.read_csv(filename)
    
    start_date = old_df["Date"][len(old_df)-1].split('/')
    month = int(start_date[0])
    day = int(start_date[1])
    year = int(start_date[2])
    
    new_df = createDataFrame(getTrainingSet(year, month, (day+1),2019, 10, 29,'2019-20', '10/16/2019'))
    uptodate = old_df.append(new_df)
    uptodate.drop(columns = ['Unnamed: 0'])
    
    uptodate.to_csv(filename)

In [15]:
def month_to_month(filename, season = '2018-19'):
    if (path.exists(filename) != True): #file doesnt exist
        getTrainingSetCSV(2018, 10, 25, 2018, 11, 1,'2018-19', '10/16/2018')

    if (path.exists(filename) == True): #file does exist
        old_df = pd.read_csv(filename)
        start_date = old_df["Date"][len(old_df)-1].split('/')
        month = int(start_date[0])
        day = int(start_date[1])
        year = int(start_date[2])
        
        new_month = (month%12)+1
        new_df = old_df
        
        end_year = year
        
        while (new_month != 7):
            new_df = pd.read_csv(filename)
            new_df.append(createDataFrame(getTrainingSet(year, new_month,1,end_year,(new_month%12)+1, 1,'2018-19', '10/16/2018')), ignore_index=True)
            new_df.to_csv(filename)
            new_month = (new_month+1)%12
            if (new_month == 1):
                end_year = end_year + 1
                

In [None]:
month_to_month('gamesWithInfo.csv')

In [16]:
def day_to_day(filename, season = '2018-19'):
    if (path.exists(filename) != True): #file doesnt exist
        getTrainingSetCSV(2018, 10, 25, 2018, 11, 1,'2018-19', '10/16/2018')

    if (path.exists(filename) == True): #file does exist
        old_df = pd.read_csv(filename)
        start_date = old_df["Date"][len(old_df)-1].split('/')
        month = int(start_date[0])
        day = int(start_date[1])
        year = int(start_date[2])
        
        current_date = datetime.date(year, month, day)
        current_date = current_date + datetime.timedelta(1)
        end_date = datetime.date(2019, 7, 1) 
        
        new_df = old_df
                
        while (current_date < end_date):
            
            new_df = pd.read_csv(filename)
            y = current_date.year
            m = current_date.month
            d = current_date.day
            
            current_date = current_date + datetime.timedelta(1)
            n_y = current_date.year
            n_m = current_date.month
            n_d = current_date.day
            
            temp_df = createDataFrame(getTrainingSet(y, m, d, n_y, n_m, n_d,'2018-19', '10/16/2018'))
            temp_df.to_csv(filename, mode='a', header=False)


1
False

 Original Date:  2019-07-20 


 New Date:  2019-08-05 



In [None]:
day_to_day('gamesWithInfo.csv')

11/01/2018
['Philadelphia 76ers', 'LA Clippers', -0.29134871082327607, 1.1259214797773416, 2.0148873837911423, -1.191780164151846, -0.8328684684746059, -0.835604182564774, 0.3997919784781244, -1.508555799190075, 1]
['Boston Celtics', 'Milwaukee Bucks', -1.173601849231789, -1.095491169513091, -1.1102440686196091, -1.8227226039969415, -1.4158763964068304, -2.467021872334094, -0.39979197847812475, -2.771532747349211, 1]
['Cleveland Cavaliers', 'Denver Nuggets', -2.92990112011013, -1.2172124105701003, -1.3980851234469163, 0.6309424398450955, -2.082171171186515, -0.0795813507204529, 2.878502245042497, -0.07016538600884087, 0]
['Portland Trail Blazers', 'New Orleans Pelicans', 0.5868009246158944, 1.8866792363836564, -2.384968739997678, 0.9814660175368151, 0.832868468474606, -0.0795813507204528, -1.3792823257495277, 0.1754134650221023, 1]
['Atlanta Hawks', 'Sacramento Kings', -1.3910875066069104, -0.39559403343528393, 0.04112015068961575, 1.962932035073629, -1.0549667267345009, -1.79058039121

['Philadelphia 76ers', 'Detroit Pistons', -0.061842360872946756, 0.16284562759160504, 3.246998076584067, 0.14002958638764818, 0.2188386337415495, -0.13009527422253253, -0.3072498217554736, 0.5364675836939556, 1]
['San Antonio Spurs', 'New Orleans Pelicans', 0.8822843484540457, 0.48853688277481533, -0.5782325341862043, -1.1902514842950145, 0.2188386337415495, -0.5203810968901332, -0.8398161794649628, -0.6131058099359451, 1]
['Chicago Bulls', 'Houston Rockets', -0.26386073972457413, -0.130276502073286, 1.6012593254387177, 0.7701627251320687, 0.01367741460884675, 0.23850800274131378, 0.3891831075569314, 1.341168959234885, 0]
['Portland Trail Blazers', 'Los Angeles Lakers', 1.5460590218236785, 2.442684413874077, -1.9126153053851365, 0.7001479319382434, 1.1352254125342882, 0.6287938254089114, -1.044649393968609, 0.7280631492989399, 0]
['Indiana Pacers', 'Boston Celtics', -0.3421943968303074, -1.8890092800626186, 0.17791770282652367, 0.49010355235676994, 0.16412897530616222, 2.12488947896805

['Golden State Warriors', 'Memphis Grizzlies', 1.2403782136578059, 1.8935803480327253, 2.5597420188768645, 0.816607398837979, 1.2493363091654093, 2.9005368733158874, 0.5618133417374334, 2.805425198186681, 1]
['New York Knicks', 'Chicago Bulls', 0.4510466231482929, 0.8161984258761751, -1.6052619440414226, -0.2722024662793263, 0.590345728506732, -0.11602147493263554, -1.0588020671205456, -1.242402587768389, 0]
['LA Clippers', 'Minnesota Timberwolves', 0.7036327321113373, 1.1753257332616927, -0.5640109533118501, 2.24567034680444, 1.6749343925074718, 0.9281717994610841, -1.858305668823811, 2.0840301472243903, 1]
['Orlando Magic', 'Cleveland Cavaliers', 1.0013235033892107, 1.0773819221565502, 1.7354183178826192, -1.3610123313966305, 0.2745794086077824, -1.2530319292724614, -1.5557907925036574, -1.322557593430866, 1]
