In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_poisson_deviance
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import VotingRegressor
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor

In [2]:
def opennbacsv(filename):
    SEP = os.sep
    projectpath = os.path.dirname(os.getcwd())
    datapath = projectpath + SEP + "data"
    nba_all = datapath + SEP + filename
    nba = pd.read_csv(nba_all)
    nba['Teamwinpred'] = nba['Teamwinpred'].str.rstrip('%').astype('float') / 100.0
    nba['Opponentwinpred'] = nba['Opponentwinpred'].str.rstrip('%').astype('float') / 100.0
    nba.drop(["Country", "League", "Season"], axis=1, inplace=True)
    return nba

In [5]:
#nba.columns

In [3]:
nba = opennbacsv("nbaclean.csv")

In [301]:
def fitmodel(X, y, regressor, eon=5):
    if regressor=="LinearRegression":
        reg = LinearRegression()
    elif regressor=="RandomForestRegressor":
        reg = RandomForestRegressor(n_estimators=eon, random_state=1)
    elif regressor=="RidgeCV":
        reg = RidgeCV()
    elif regressor=="GradientBoostingRegressor":
        reg = GradientBoostingRegressor(random_state=1)
    elif regressor=="GaussianProcessRegressor":
        reg = GaussianProcessRegressor(random_state=1)
    elif regressor=="KNeighborsRegressor":
        reg = KNeighborsRegressor(n_neighbors=eon)
    else:
        reg = SVR(gamma=.1, kernel='rbf', C=1.0, epsilon=0.2)
    #print("Regressor used: ", reg)
    tree_preprocessor = ColumnTransformer(
    [
        ("categorical", OrdinalEncoder(),
            ["Homeoraway", "Favourite", "OpponentFormReceivedLast10Games"]),
        ("numeric", "passthrough",
            ["Teamodds", "Totalscorepred", "Teamwinpred", "Teamscorepred", "Opponentscorepred",
            "Teamscore", "Totalscorereal", "Totalfrompred", "Scoredfrompred",
            "OpponentAvgRealReceived10games", "OpponentAvgPredReceived10games",
            "OpponentAvgDifReceivedRealvsPred10games", "OpponentReceivedOorULast10Games"
            ]),
    ],
    remainder="drop")
    model = Pipeline([
        ("preprocessor", tree_preprocessor),
        ("regressor", reg),
    ])
    model = model.fit(X, y)
    return model

In [300]:
def predictor(Column2predict, Team, predgamenumber, regressor="SVR", eon=5):
    columnlist = ["Date","Homeoraway", "Opponent", "Teamodds", "Teamscore", "Opponentodds",
                "Opponentscore", "Favourite", "Totalscorepred", "Totalscorereal",
                "Totalfrompred", "Teamwinpred", "Teamscorepred", "Scoredfrompred",
                "Opponentwinpred", "Opponentscorepred", "Receivedfrompred"]
    nba = opennbacsv("nbaclean.csv")
    pd.set_option('mode.chained_assignment',None)
    x = nba.loc[nba["Team"] == Team]
    gamesfinished = predgamenumber - 1
    x = x.head(gamesfinished)[columnlist]
    x = x.tail(10)[columnlist]
    oppo = x["Opponent"].iloc[-1]
    opporange = np.arange(0, (len(x)))
    x["OpponentAvgRealScored10games"] = np.nan
    x["OpponentAvgPredScored10games"] = np.nan
    x["OpponentAvgDifScoredRealvsPred10games"] = np.nan
    x["OpponentAvgRealReceived10games"] = np.nan
    x["OpponentAvgPredReceived10games"] = np.nan
    x["OpponentAvgDifReceivedRealvsPred10games"] = np.nan
    x["OpponentFormScoredLast10Games"] = np.nan
    x["OpponentFormReceivedLast10Games"] = np.nan
    x["OpponentReceivedOorULast10Games"] = np.nan
    for i in opporange:
        opponentteam = x["Opponent"].iloc[i]
        opponentdf = nba.loc[nba["Team"] == opponentteam]
        opponentdf = opponentdf.head(gamesfinished)[columnlist]
        opponentdf = opponentdf.tail(10)[columnlist]
        conditions = [(opponentdf["Opponentscore"] >= opponentdf["Opponentscorepred"]),
                    (opponentdf["Opponentscore"] < opponentdf["Opponentscorepred"])]
        values = [1, 0]
        opponentdf["OUPred"] = np.select(conditions, values)
        x["OpponentAvgRealScored10games"].iloc[i] = opponentdf["Teamscore"].mean()
        x["OpponentAvgPredScored10games"].iloc[i] = opponentdf["Teamscorepred"].mean()
        x["OpponentAvgDifScoredRealvsPred10games"].iloc[i] = opponentdf["Scoredfrompred"].mean()
        x["OpponentAvgRealReceived10games"].iloc[i] = opponentdf["Opponentscore"].mean()
        x["OpponentAvgPredReceived10games"].iloc[i] = opponentdf["Opponentscorepred"].mean()
        x["OpponentAvgDifReceivedRealvsPred10games"].iloc[i] = opponentdf["Receivedfrompred"].mean()
        x["OpponentReceivedOorULast10Games"].iloc[i] = opponentdf["OUPred"].sum() 
        if opponentdf["Scoredfrompred"].mean() > 0:
            x["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringOver"
        else:
            x["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringUnder"
        if opponentdf["Receivedfrompred"].mean() > 0:
            x["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingOver"
        else:
            x["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingUnder"
    model = fitmodel(x, x[Column2predict], regressor, eon)
    z = nba.loc[nba["Team"] == Team]
    z = x.head(predgamenumber)[columnlist]
    z = x.tail(10)[columnlist]
    #print(f'{Column2predict} prediction for {Team} in game number {predgamenumber} vs {oppo}:')
    Bookie = z["Teamscorepred"].iloc[-1]
    #print("Bookies predicted score: ", Bookie)
    rs = z["Teamscore"].iloc[-1]
    s = z["Teamscore"].mean()
    os = z["Opponentscorepred"].mean()
    ts = z["Totalscorepred"].mean()
    z["Teamscore"].iloc[-1] = s #these scores are modified before using model.predict as they are used in the fit but not available to make the prediction
    z["Opponentscore"].iloc[-1] = os #these scores are modified
    z["Totalscorereal"].iloc[-1] = ts #these scores are modified
    z["Totalfrompred"].iloc[-1] = z["Totalscorepred"].iloc[-1] - z["Totalscorepred"]. iloc[-2] #these scores are modified
    z["Scoredfrompred"].iloc[-1] = (z["Totalscorepred"].iloc[-1] - z["Totalscorepred"]. iloc[-2])/2 #these scores are modified
    z["Receivedfrompred"].iloc[-1] = (z["Totalscorepred"].iloc[-1] - z["Totalscorepred"]. iloc[-2])/2 #these scores are modified
    z["OpponentAvgRealScored10games"] = np.nan
    z["OpponentAvgPredScored10games"] = np.nan
    z["OpponentAvgDifScoredRealvsPred10games"] = np.nan
    z["OpponentAvgRealReceived10games"] = np.nan
    z["OpponentAvgPredReceived10games"] = np.nan
    z["OpponentAvgDifReceivedRealvsPred10games"] = np.nan
    z["OpponentFormScoredLast10Games"] = np.nan
    z["OpponentFormReceivedLast10Games"] = np.nan
    z["OpponentReceivedOorULast10Games"] = np.nan
    opporange = np.arange(0, (len(z)))
    for i in opporange:
        opponentteam = z["Opponent"].iloc[i]
        opponentdf = nba.loc[nba["Team"] == opponentteam]
        opponentdf = opponentdf.head(gamesfinished)[columnlist]
        opponentdf = opponentdf.tail(10)[columnlist]
        conditions = [(opponentdf["Opponentscore"] >= opponentdf["Opponentscorepred"]),
                    (opponentdf["Opponentscore"] < opponentdf["Opponentscorepred"])]
        values = [1, 0]
        opponentdf["OUPred"] = np.select(conditions, values)
        z["OpponentAvgRealScored10games"].iloc[i] = opponentdf["Teamscore"].mean()
        z["OpponentAvgPredScored10games"].iloc[i] = opponentdf["Teamscorepred"].mean()
        z["OpponentAvgDifScoredRealvsPred10games"].iloc[i] = opponentdf["Scoredfrompred"].mean()
        z["OpponentAvgRealReceived10games"].iloc[i] = opponentdf["Opponentscore"].mean()
        z["OpponentAvgPredReceived10games"].iloc[i] = opponentdf["Opponentscorepred"].mean()
        z["OpponentAvgDifReceivedRealvsPred10games"].iloc[i] = opponentdf["Receivedfrompred"].mean()
        z["OpponentReceivedOorULast10Games"].iloc[i] = opponentdf["OUPred"].sum() 
        if opponentdf["Scoredfrompred"].mean() > 0:
            z["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringOver"
        else:
            z["OpponentFormScoredLast10Games"].iloc[i] = "FormIsScoringUnder"
        if opponentdf["Receivedfrompred"].mean() > 0:
            z["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingOver"
        else:
            z["OpponentFormReceivedLast10Games"].iloc[i] = "FormIsReceivingUnder"
    last10gamesscoring = float("{:.2f}".format(z["Teamscore"].mean()))
    opponentlast10games = float("{:.2f}".format(x["OpponentAvgRealReceived10games"].iloc[-1]))
    opponentform = z["OpponentReceivedOorULast10Games"].iloc[-1]
    #print(f'{Team} last 10 games scoring Avg is {last10gamesscoring}')
    #print(x["Opponent"].iloc[-1], f'last 10 games receiving Avg is {opponentlast10games}')
    #print(x["Opponent"].iloc[-1], f'previous 10 games, points received in {opponentform} of the games has been over the bookies prediction')
    systemprediction = float("{:.2f}".format(model.predict(z)[-1]))
    #print("System predicted score using bookies prediction: ", systemprediction)
    VariablePrediction = predictionmaker(Bookie, last10gamesscoring, opponentlast10games, opponentform, systemprediction)
    #print("Variable Prediction is: ",VariablePrediction)
    SystemStraightPrediction = systempredictionmaker(Bookie, systemprediction)
    #print(SystemStraightPrediction)
    #print("Final Score (Real): ", rs)
    if SystemStraightPrediction == "Over" and rs > Bookie:
        SystemPred = "Correct"
    elif SystemStraightPrediction == "Under" and rs < Bookie:
        SystemPred = "Correct"
    else:
        SystemPred = "Incorrect"
    #print("System Prediction was: ", SystemPred)
    #print("-----------------")
    date = z["Date"].iloc[-1]
    funcreturn = [date, Column2predict, Team, predgamenumber, Regressor, estimatororneigbor, Bookie, last10gamesscoring, opponentlast10games, 
    opponentform, systemprediction, SystemStraightPrediction, VariablePrediction, rs, SystemPred]
    return funcreturn
    


In [289]:
def systempredictionmaker(bookies, systempred):
    
    if bookies > systempred:
        return "Under"
    else:
        return "Over"

In [288]:
def predictionmaker(bookies, last10gamesscoring, opponentlast10games, opponentform, systempred):
    if bookies < last10gamesscoring and bookies < opponentlast10games and bookies < systempred and opponentform > 5:
        return (f'Prediction is OVER {bookies}')
    if bookies > last10gamesscoring and bookies > opponentlast10games and bookies > systempred and opponentform < 5:
        return (f'Prediction is UNDER {bookies}')
    else:
        return "Not conclusive"

    

In [292]:
columnnames= ["Date", "MarketPredicted", "Team", "Game Number", "Regressor", "Estimatororneigbor", "Bookie Prediction", "TeamAvg10Games", "OpponentAvg10Games", "OpponentFormOver10Games", "Model Score Prediction", "Model Result Prediction", "FormulaPrediction", "Real Score", "System Result"]
predresultsdf = pd.DataFrame(columns = columnnames)
games = list(range(10,73))

In [225]:
Column2predict = "Teamscore"
Team = "Los Angeles Clippers"
Gamenumber = 34
Regressor = "KNeighborsRegressor"
estimatororneigbor = 4

In [282]:
predictor(Column2predict, Team, Gamenumber, Regressor, estimatororneigbor)

In [275]:
teamlist = ["Atlanta Hawks","Boston Celtics","Brooklyn Nets","Charlotte Hornets","Chicago Bulls","Cleveland Cavaliers",
    "Dallas Mavericks","Denver Nuggets","Detroit Pistons","Golden State Warriors","Houston Rockets","Indiana Pacers",
    "Los Angeles Clippers","Los Angeles Lakers","Memphis Grizzlies","Miami Heat","Milwaukee Bucks","Minnesota Timberwolves",
    "New Orleans Pelicans","New York Knicks","Oklahoma City Thunder","Orlando Magic","Philadelphia 76ers","Phoenix Suns",
    "Portland Trail Blazers","Sacramento Kings","San Antonio Spurs","Toronto Raptors","Utah Jazz","Washington Wizards"]

In [302]:
Column2predict = "Teamscore"
Team = "New York Knicks"
Gamenumber = 11
Regressor = "RandomForestRegressor"
estimatororneigbor = 5

for team in teamlist:
    print(team)
    for i in games:
        to_append = predictor(Column2predict, team, i, Regressor, estimatororneigbor)
        dflen = len(predresultsdf)
        predresultsdf.loc[dflen] = to_append



Atlanta Hawks
Boston Celtics
Brooklyn Nets
Charlotte Hornets
Chicago Bulls
Cleveland Cavaliers
Dallas Mavericks
Denver Nuggets
Detroit Pistons
Golden State Warriors
Houston Rockets
Indiana Pacers
Los Angeles Clippers
Los Angeles Lakers
Memphis Grizzlies
Miami Heat
Milwaukee Bucks
Minnesota Timberwolves
New Orleans Pelicans
New York Knicks
Oklahoma City Thunder
Orlando Magic
Philadelphia 76ers
Phoenix Suns
Portland Trail Blazers
Sacramento Kings
San Antonio Spurs
Toronto Raptors
Utah Jazz
Washington Wizards


In [299]:
predresultsdf.to_csv("allRandomForestRegressor.csv")