In [210]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import RidgeCV
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
import datetime

In [2]:
def opennbacsv(filename):
    SEP = os.sep
    projectpath = os.path.dirname(os.getcwd())
    datapath = projectpath + SEP + "data"
    nba_all = datapath + SEP + filename
    nba = pd.read_csv(nba_all)
    nba['Teamwinpred'] = nba['Teamwinpred'].str.rstrip('%').astype('float') / 100.0
    nba['Opponentwinpred'] = nba['Opponentwinpred'].str.rstrip('%').astype('float') / 100.0
    nba.drop(["Country", "League", "Season"], axis=1, inplace=True)
    return nba

In [220]:
def fitmodel(X, y, regressor, eon=5):
    if regressor=="LinearRegression":
        reg = LinearRegression()
    elif regressor=="RandomForestRegressor":
        reg = RandomForestRegressor(n_estimators=100, random_state=1)
    elif regressor=="DecisionTreeRegressor":
        reg = DecisionTreeRegressor(max_depth=eon)
    elif regressor=="RidgeCV":
        reg = RidgeCV()
    elif regressor=="GradientBoostingRegressor":
        reg = GradientBoostingRegressor(random_state=1)
    elif regressor=="GaussianProcessRegressor":
        reg = GaussianProcessRegressor(random_state=1)
    elif regressor=="BayesianRidge":
        reg = linear_model.BayesianRidge()
    elif regressor=="KNeighborsRegressor":
        reg = KNeighborsRegressor(n_neighbors=eon)
    elif regressor=="RadiusNeighborsRegressor":
        reg = RadiusNeighborsRegressor()
    else:
        reg = SVR(gamma=.1, kernel='rbf', C=1.0, epsilon=eon)
    print(regressor)
    tree_preprocessor = ColumnTransformer(
    [
        ("categorical", OrdinalEncoder(),
            ["Homeoraway", "Favourite", "OpponentFormReceivedLast10Games"]),
        ("numeric", "passthrough",
            ["Teamodds", "Totalscorepred", "Teamwinpred", "Teamscorepred", "Opponentscorepred",
            "Teamscore", "Totalscorereal", "Totalfrompred", "Scoredfrompred",
            "OpponentAvgRealReceived10games", "OpponentAvgPredReceived10games",
            "OpponentAvgDifReceivedRealvsPred10games", "OpponentReceivedOorULast10Games"
            ]),
    ],
    remainder="drop")
    model = Pipeline([
        ("preprocessor", tree_preprocessor),
        ("regressor", reg),
    ])
    model = model.fit(X, y)
    return model

In [215]:
def predictor(Team, predgamenumber, regressor="SVR", eon=5):
    if regressor == "RandomForestRegressor":
        eon = 100
    columnlist = ["Date", "Homeoraway", "Opponent", "Teamodds", "Teamscore", "Opponentodds",
                "Opponentscore", "Favourite", "Totalscorepred", "Totalscorereal",
                "Totalfrompred", "Teamwinpred", "Teamscorepred", "Scoredfrompred",
                "Opponentwinpred", "Opponentscorepred", "Receivedfrompred"]
    nba = opennbacsv("nbaclean.csv") #Function called to retrieve DF
    pd.set_option('mode.chained_assignment',None)
    x = nba.loc[nba["Team"] == Team] #New DF made to filter the team to predict
    gamesfinished = predgamenumber - 1
    x = x.head(gamesfinished)[columnlist]
    x = x.tail(10)[columnlist]
    #oppo = x["Opponent"].iloc[-1]
    columnstoadd = ["OpponentAvgRealScored10games", "OpponentAvgPredScored10games", "OpponentAvgDifScoredRealvsPred10games", 
                    "OpponentAvgRealReceived10games", "OpponentAvgPredReceived10games", "OpponentAvgDifReceivedRealvsPred10games",
                    "OpponentFormScoredLast10Games", "OpponentFormReceivedLast10Games", "OpponentReceivedOorULast10Games"]
    for column in columnstoadd: #Add columns as NaN to update DF with the opponents statistics over previous games
        x[column] = np.nan
    realscore = x["Teamscore"].iloc[-1] 
    teamscoreavg = x["Teamscore"].mean()
    opposcoreavg = x["Opponentscorepred"].mean()
    totalscoreavg = x["Totalscorepred"].mean()
    x["Teamscore"].iloc[-1] = teamscoreavg #these scores are modified before using fit as the actual value would not be available before the game
    x["Opponentscore"].iloc[-1] = opposcoreavg #these scores are modified same reason as teamscoreavg
    x["Totalscorereal"].iloc[-1] = totalscoreavg #these scores are modified same reason as teamscoreavg
    x["Totalfrompred"].iloc[-1] = x["Totalscorepred"].iloc[-1] - x["Totalscorepred"]. iloc[-2] #these scores are modified same reason as teamscoreavg
    x["Scoredfrompred"].iloc[-1] = (x["Totalscorepred"].iloc[-1] - x["Totalscorepred"]. iloc[-2])/2 #these scores are modified same reason as teamscoreavg
    x["Receivedfrompred"].iloc[-1] = (x["Totalscorepred"].iloc[-1] - x["Totalscorepred"]. iloc[-2])/2 #these scores are modified same reason as teamscoreavg
    opporange = np.arange(0, (len(x)))
    for i in opporange:
        opponentteam = x["Opponent"].iloc[i]
        opponentdf = nba.loc[nba["Team"] == opponentteam]
        opponentdf = opponentdf.head(predgamenumber)[columnlist]
        opponentdf = opponentdf.tail(11)[columnlist]
        opponentdf = opponentdf[:-1]
        conditions = [(opponentdf["Opponentscore"] >= opponentdf["Opponentscorepred"]),
                    (opponentdf["Opponentscore"] < opponentdf["Opponentscorepred"])]
        values = [1, 0]
        opponentdf["OUPred"] = np.select(conditions, values)
        x["OpponentAvgRealScored10games"].iloc[i] = opponentdf["Teamscore"].mean()
        x["OpponentAvgPredScored10games"].iloc[i] = opponentdf["Teamscorepred"].mean()
        x["OpponentAvgDifScoredRealvsPred10games"].iloc[i] = opponentdf["Scoredfrompred"].mean()
        x["OpponentAvgRealReceived10games"].iloc[i] = opponentdf["Opponentscore"].mean()
        x["OpponentAvgPredReceived10games"].iloc[i] = opponentdf["Opponentscorepred"].mean()
        x["OpponentAvgDifReceivedRealvsPred10games"].iloc[i] = opponentdf["Receivedfrompred"].mean()
        x["OpponentReceivedOorULast10Games"].iloc[i] = opponentdf["OUPred"].sum() 
        if opponentdf["Scoredfrompred"].mean() > 0:
            x["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringOver"
        else:
            x["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringUnder"
        if opponentdf["Receivedfrompred"].mean() > 0:
            x["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingOver"
        else:
            x["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingUnder"
    model = fitmodel(x, x["Teamscore"], regressor, eon)
    Bookie = x["Teamscorepred"].iloc[-1]
    last10gamesscoring = float("{:.2f}".format(x["Teamscore"].mean()))
    opponentlast10games = float("{:.2f}".format(x["OpponentAvgRealReceived10games"].iloc[-1]))
    opponentform = x["OpponentReceivedOorULast10Games"].iloc[-1]
    #print(f'{Team} last 10 games scoring Avg is {last10gamesscoring}')
    #print(x["Opponent"].iloc[-1], f'last 10 games receiving Avg is {opponentlast10games}')
    #print(x["Opponent"].iloc[-1], f'previous 10 games, points received in {opponentform} of the games has been over the bookies prediction')
    systemprediction = float("{:.2f}".format(model.predict(x)[-1]))
    #print("System predicted score using bookies prediction: ", systemprediction)
    VariablePrediction = predictionmaker(Bookie, last10gamesscoring, opponentlast10games, opponentform, systemprediction)
    #print("Variable Prediction is: ",VariablePrediction)
    SystemStraightPrediction = systempredictionmaker(Bookie, systemprediction)
    #print(SystemStraightPrediction)
    #print("Final Score (Real): ", realscore)
    if SystemStraightPrediction == "Over" and realscore > Bookie:
        SystemPred = "Correct"
    elif SystemStraightPrediction == "Under" and realscore < Bookie:
        SystemPred = "Correct"
    else:
        SystemPred = "Incorrect"
    #print("System Prediction was: ", SystemPred)
    #print("-----------------")
    date = x["Date"].iloc[-1]
    funcreturn = [date, Column2predict, Team, predgamenumber, regressor, estimatororneigbor, Bookie, last10gamesscoring, opponentlast10games, 
    opponentform, systemprediction, SystemStraightPrediction, VariablePrediction, realscore, SystemPred]
    return funcreturn


In [178]:
def systempredictionmaker(bookies, systempred):
    if bookies > systempred:
        return "Under"
    else:
        return "Over"

In [179]:
def predictionmaker(bookies, last10gamesscoring, opponentlast10games, opponentform, systempred):
    if bookies < last10gamesscoring and bookies < opponentlast10games and bookies < systempred and opponentform > 5:
        return (f'Prediction is OVER {bookies}')
    if bookies > last10gamesscoring and bookies > opponentlast10games and bookies > systempred and opponentform < 5:
        return (f'Prediction is UNDER {bookies}')
    else:
        return "Not conclusive"

In [180]:
'''
Team = "Los Angeles Clippers"
Gamenumber = 34
Regressor = "GaussianProcessRegressor"
estimatororneigbor = 5
'''

'\nTeam = "Los Angeles Clippers"\nGamenumber = 34\nRegressor = "GaussianProcessRegressor"\nestimatororneigbor = 5\n'

In [172]:
#predictor(Team, Gamenumber, Regressor, estimatororneigbor)

In [181]:
teamlist = ["Atlanta Hawks","Boston Celtics","Brooklyn Nets","Charlotte Hornets","Chicago Bulls","Cleveland Cavaliers",
    "Dallas Mavericks","Denver Nuggets","Detroit Pistons","Golden State Warriors","Houston Rockets","Indiana Pacers",
    "Los Angeles Clippers","Los Angeles Lakers","Memphis Grizzlies","Miami Heat","Milwaukee Bucks","Minnesota Timberwolves",
    "New Orleans Pelicans","New York Knicks","Oklahoma City Thunder","Orlando Magic","Philadelphia 76ers","Phoenix Suns",
    "Portland Trail Blazers","Sacramento Kings","San Antonio Spurs","Toronto Raptors","Utah Jazz","Washington Wizards"]

In [221]:
regressorlist = ["SVR"]

In [183]:
columnnames= ["Date", "MarketPredicted", "Team", "GameNumber", "Regressor", "Estimatororneigbor", "BookiePrediction",
            "TeamAvg10Games", "OpponentAvg10Games", "OpponentFormOver10Games", "ModelScorePrediction", "ModelResultPrediction", 
            "FormulaPrediction", "RealScore", "SystemResult"]
games = list(range(10,73))

In [None]:
#modelresults = pd.DataFrame(columns = columnnames2) #DO NOT TOUCH!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

In [184]:
columnnames2= ["Regressor", "EstimatorsorNeigbor", "ContainsNotConclusivePred", "Totalgames", "GamesRemovedPred0", "TotalCorrect", "Correct", "BetReturn", "BetProfit", "TotalOversPred",
        "TotalOversCorrect", "OversCorrect", "OversBetReturn", "OversProfit", "TotalUndersPred", "TotalUndersCorrect", "UndersCorrect", "UndersBetReturn", "UndersProfit"]


In [222]:
estimatororneigbor = 0.1


for regressor in regressorlist:
    print(regressor)
    predresultsdf = pd.DataFrame(columns = columnnames)
    for team in teamlist:
        for i in games:
            to_append = predictor(team, i, regressor, estimatororneigbor)
            dflen = len(predresultsdf)
            predresultsdf.loc[dflen] = to_append
    to_append = appendresults(predresultsdf)
    dflen = len(modelresults)
    modelresults.loc[dflen] = to_append
    to_append = appendonlyfirmresults(predresultsdf)
    dflen = len(modelresults)
    modelresults.loc[dflen] = to_append
    modelresults.to_csv("modelresults.csv")



SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR
SVR


In [175]:
def appendresults(df):
        totgame = len(df)
        df = df.sort_values(by="Date")
        conditions = [(df["SystemResult"] == "Correct"),
                (df["SystemResult"] == "Incorrect")]
        values = [0.83, -1]
        df["BetReturn"] = np.select(conditions, values)
        df.drop(df[df['BookiePrediction'] == 0].index, inplace = True)
        Regressor = df["Regressor"].iloc[-1]
        EstimatorsorNeigbor = df["Estimatororneigbor"].iloc[-1]
        ContainsNotConclusivePred = df.isin(["Not conclusive"]).any().any()
        Totalgames = len(df)
        GamesRemovedPred0 = totgame - Totalgames
        TotalCorrect = df.loc[df["SystemResult"] == "Correct", "BetReturn"].count()
        Correct = "{:.2f}%".format((TotalCorrect / Totalgames)*100)
        BetReturn = df['BetReturn'].sum()
        BetProfit = "{:.2f}%".format((BetReturn/Totalgames)*100)
        TotalOversPred = df[df.ModelResultPrediction == "Over"].count()[-1]
        TotalOversCorrect = df[(df.ModelResultPrediction == "Over") & (df.SystemResult == "Correct")].count()[-1]
        OversCorrect = "{:.2f}%".format((TotalOversCorrect/TotalOversPred)*100)
        OversBetReturn = df[(df['ModelResultPrediction'] == "Over")]['BetReturn'].sum()
        OversProfit = "{:.2f}%".format((OversBetReturn/TotalOversPred)*100)
        TotalUndersPred = df[df.ModelResultPrediction == "Under"].count()[-1]
        TotalUndersCorrect = df[(df.ModelResultPrediction == "Under") & (df.SystemResult == "Correct")].count()[-1]
        UndersCorrect = "{:.2f}%".format((TotalUndersCorrect/TotalUndersPred)*100)
        UndersBetReturn = df[(df['ModelResultPrediction'] == "Under")]['BetReturn'].sum()
        UndersProfit = "{:.2f}%".format((UndersBetReturn/TotalUndersPred)*100)
        funcreturn = [Regressor, EstimatorsorNeigbor, ContainsNotConclusivePred, Totalgames, GamesRemovedPred0, TotalCorrect, Correct, BetReturn, BetProfit, TotalOversPred,
        TotalOversCorrect, OversCorrect, OversBetReturn, OversProfit, TotalUndersPred, TotalUndersCorrect, UndersCorrect, UndersBetReturn, UndersProfit]
        return funcreturn

In [176]:
def appendonlyfirmresults(df):
    x = df[df['FormulaPrediction'] != "Not conclusive"] 
    y = appendresults(x)
    return y

In [161]:
to_append = appendresults(predresultsdf)
dflen = len(modelresults)
modelresults.loc[dflen] = to_append
to_append = appendonlyfirmresults(predresultsdf)
dflen = len(modelresults)
modelresults.loc[dflen] = to_append

In [197]:
modelresults

Unnamed: 0,Regressor,EstimatorsorNeigbor,ContainsNotConclusivePred,Totalgames,GamesRemovedPred0,TotalCorrect,Correct,BetReturn,BetProfit,TotalOversPred,TotalOversCorrect,OversCorrect,OversBetReturn,OversProfit,TotalUndersPred,TotalUndersCorrect,UndersCorrect,UndersBetReturn,UndersProfit
0,RandomForestRegressor,100,True,1864,26,1087,58.32%,125.21,6.72%,953,569,59.71%,88.27,9.26%,911,518,56.86%,36.94,4.05%
1,RandomForestRegressor,100,False,448,16,305,68.08%,110.15,24.59%,279,192,68.82%,72.36,25.94%,169,113,66.86%,37.79,22.36%
2,LinearRegression,5,True,1864,26,1088,58.37%,127.04,6.82%,977,579,59.26%,82.57,8.45%,887,509,57.38%,44.47,5.01%
3,LinearRegression,5,False,452,16,309,68.36%,113.47,25.10%,284,193,67.96%,69.19,24.36%,168,116,69.05%,44.28,26.36%
4,RidgeCV,5,True,1864,26,1088,58.37%,127.04,6.82%,975,578,59.28%,82.74,8.49%,889,510,57.37%,44.3,4.98%
5,RidgeCV,5,False,452,16,309,68.36%,113.47,25.10%,284,193,67.96%,69.19,24.36%,168,116,69.05%,44.28,26.36%
6,GradientBoostingRegressor,5,True,1864,26,1088,58.37%,127.04,6.82%,977,579,59.26%,82.57,8.45%,887,509,57.38%,44.47,5.01%
7,GradientBoostingRegressor,5,False,452,16,309,68.36%,113.47,25.10%,284,193,67.96%,69.19,24.36%,168,116,69.05%,44.28,26.36%
8,GaussianProcessRegressor,5,True,1864,26,1088,58.37%,127.04,6.82%,977,579,59.26%,82.57,8.45%,887,509,57.38%,44.47,5.01%
9,GaussianProcessRegressor,5,False,452,16,309,68.36%,113.47,25.10%,284,193,67.96%,69.19,24.36%,168,116,69.05%,44.28,26.36%
