# 1. Data Preprocessing

There are two datasets in this projects. First one contains relative score differential and total score of each game of each team scrapped by me from ESPN. The other dataset is the "Advanced Team Statistics" of each team againg scrapped by me from Fox Sport on each day of the 2018-2019 season. You can go over databases, we have shared them. The statistics datasets have almost every predictor we want to train our model except the Home/Away information which we will calculate using schedule/score dataset and add it to our training dataset.

Although we have data of the whole season the model we want to build is going to use only the previous data to predict a game. For example if we want to predict 44th game of the season for Timberwolves the model will only use previous games to train and will predict the result of 44th game. Besides, model will not use the statistics of the team that we want to predict its game but its opponents'. What I mean is we will use previous opponents statistics as explanatory variables and the score of the game against that opponent as response variable and to predict 44th game of the season for Timberwolves we will give the statistics of the opponent to the model to predict the result of that game. 

In [1]:
import numpy as np
import sqlite3
import pandas as pd

In [2]:
# Connect to databases and fetch all 
con_schedule = sqlite3.connect("schedule_scores.db")
cursor_schedule = con_schedule.cursor()

con_stats = sqlite3.connect("team_stats.db")
cursor_stats = con_stats.cursor()

# List of 30 teams and city name dictionary to match the names used by ESPN and FoxSport
teams = ["CHA","PHI","TOR","BOS","CLE","IND","WSH","MIL","MIA","DET","NY","CHI","ORL","BKN","ATL","HOU","GS","POR","NO","MIN","SA","OKC","DEN","LAC","UTA","LAL","SAC","DAL","PHX","MEM"]
city_dic = {"atl":"Atlanta","bkn": "Brooklyn" ,"bos": "Boston", "cha":"Charlotte", "chi":"Chicago", "cle": "Cleveland", "dal": "Dallas", "den": "Denver", "det":"Detroit","gs":"Golden State", "hou": "Houston", "ind":"Indiana","lac":"LA", "lal": "Los Angeles","mem": "Memphis","mia":"Miami","mil":"Milwaukee","min":"Minnesota","no":"New Orleans","ny":"New York","okc":"Oklahoma City","orl":"Orlando","phi":"Philadelphia","phx":"Phoenix","por":"Portland","sa":"San Antonio","sac":"Sacramento","tor":"Toronto","uta":"Utah","wsh":"Washington" }

# Fetching the schedules of each team
schedules = {}
for team in teams:
    cursor_schedule.execute("SELECT * FROM {}".format(team.lower()))
    schedules[team.lower()] = cursor_schedule.fetchall()

# Fetching the stats of each team
stats = {}
for team in teams:
    cursor_stats.execute("SELECT * FROM {}".format(team.lower()))
    stats[team.lower()] = cursor_stats.fetchall()  
    


In [39]:
# Now we will write a function that creates x and y matrices to predict specific game
# To do this we are going add statistics of the opponents to schedule/score dataset 
 
def get_datasets(schedules,stats):
    
    datasets = {}
    for team in teams:
        team = team.lower()
        schedule = schedules[team]

        schedule_df = pd.DataFrame(schedule, columns=['Game', 'Date', 'Opponent','Home/Away(1/0)','Score','TotalScore'])
        # Date column should be formatted from 'Oct 19 2019' to pandas date
        schedule_df['Date'] = pd.to_datetime(schedule_df['Date'],format='%b %d %Y')

        # We are going to add each opponents statisctis to that dataframe and than we are going to combine schedule_df and stats_df
        stats_df = pd.DataFrame(columns=["Date","GamesPlayed","OffRtg","DefRtg","Pace","FtRate","ThreeFgTend",
        "TrueS","Efg","TurnOver","OffReb","FtFga","EfgAllow","TurnOvAllow","DefRebAllow","FtFgaAllow"])

        # To fill stats_df we are goint to itterate over the rows of schedule_df.
        for index, row in schedule_df.iterrows():

            # team_stats.db has long city names instead of short ones like in the schedule_scores.db 
            # therefore a dictionary is used to match the team names ex. opponent_ = "Atlanta" -> opponent = "atl"
            opponent_ = row["Opponent"]
            opponent = list(filter(lambda x: x[1] == opponent_,list(city_dic.items())))[0][0]

            opponent_stats_table = stats[opponent]
            opponent_stats_df = pd.DataFrame(opponent_stats_table,columns=["Date","GamesPlayed","OffRtg","DefRtg","Pace","FtRate","ThreeFgTend",
        "TrueS","Efg","TurnOver","OffReb","FtFga","EfgAllow","TurnOvAllow","DefRebAllow","FtFgaAllow"])
            opponent_stats_df['Date'] = pd.to_datetime(opponent_stats_df['Date'],format='%b %d %Y')

            # We have a date from schedule and we basicaly try to find the statistics row that have the most 
            # similar date with schedule because the not every day's statistics are gathered
            date = row["Date"]
            stats_at_date = opponent_stats_df.iloc[opponent_stats_df.Date.searchsorted(date-pd.DateOffset(days=1))].to_frame().T
            stats_df = pd.concat([stats_df,stats_at_date])


        df = pd.concat([schedule_df.reset_index(drop=True),stats_df.reset_index(drop=True)], axis=1)
        # Changing the position of Home/Away column to make easier to chose x and y matrices
        homeAway_column = df.pop('Home/Away(1/0)')
        df.insert(7, 'Home/Away(1/0)', homeAway_column)

        df['WinLose'] = np.where(df['Score'] >= 0, 1, 0)
        
        datasets[team] = df
    return datasets

datasets = get_datasets(schedules,stats)
datasets["atl"].head()

Unnamed: 0,Game,Date,Opponent,Score,TotalScore,Date.1,GamesPlayed,Home/Away(1/0),OffRtg,DefRtg,...,TrueS,Efg,TurnOver,OffReb,FtFga,EfgAllow,TurnOvAllow,DefRebAllow,FtFgaAllow,WinLose
0,1,2018-10-17,New York,-19.0,233.0,2018-10-31 00:00:00,7,0.0,106.5,110.5,...,0.522,0.489,11.7,24.2,16.2,0.544,14.5,75.8,21.3,0
1,2,2018-10-19,Memphis,-14.0,248.0,2018-11-02 00:00:00,7,0.0,105.5,101.8,...,0.557,0.512,12.4,17.4,25.7,0.524,16.4,82.6,20.2,0
2,3,2018-10-21,Cleveland,22.0,244.0,2018-11-01 00:00:00,8,0.0,109.8,117.7,...,0.538,0.489,12.7,29.6,22.7,0.585,13.6,70.4,19.9,1
3,4,2018-10-24,Dallas,7.0,215.0,2018-10-31 00:00:00,7,1.0,107.8,113.5,...,0.542,0.507,13.2,23.9,19.4,0.568,14.2,76.1,23.4,1
4,5,2018-10-27,Chicago,-12.0,182.0,2018-10-31 00:00:00,7,1.0,108.7,118.2,...,0.567,0.535,12.9,17.5,20.1,0.539,10.9,82.5,22.3,0


## Train Test Split

The get_train_test function takes the last n games we specified as train set and row with the game that we want to predict as test set. 

In [None]:
def get_train_test(game_number,last_n_games):
    
    if game_number <= last_n_games:
        first_game = 0
    else:
        first_game = game_number-last_n_games-1

    x_train = df.iloc[first_game:game_number-1,7:-1]
    y_train = df.iloc[first_game:game_number-1,-1]
    
    x_test = df.iloc[game_number-1,7:-1]
    y_test = df.iloc[game_number-1,-1]
    
    return x_train,x_test,y_train,y_test

# 2. Models

In this section many classification algorithms are going to be coded in functions to call them during further prediction experiments and bet simulation scenarios.

## Logistic Regression

In [41]:
total_correct = 0
final_dev = 0
corrs1 = []
last_n_games = 100

for team in teams:
    team = team.lower()
    correct_winner = 0
    total_deviation = 0
    df = datasets[team]
    count = 0
    for game_number in range(30,50):
        
        begin = game_number-last_n_games-1
        if game_number <= last_n_games:
            begin = 0
        count += 1     
        
        x_train = df.iloc[begin:game_number-1,7:-1]
        y_train = df.iloc[begin:game_number-1,-1]
        x_test = df.iloc[game_number-1,7:-1]
        y_test = df.iloc[game_number-1,-1]
        
        from sklearn.preprocessing import StandardScaler
        sc_X = StandardScaler()
        sc_y = StandardScaler()
        X = sc_X.fit_transform(x_train.values)
        
        from sklearn.linear_model import LogisticRegression
        classifier = LogisticRegression()
        classifier.fit(X, y_train)
        
        x_test_sca = sc_X.transform([x_test])
        y_pred = classifier.predict(x_test_sca)
        
        if y_pred[0] == y_test:
            correct_winner += 1
        
    corrs1.append(correct_winner/count*100)

    total_correct += correct_winner

    print("{0}: WinnerCorrect -> {1:.2f}%".format(team.upper(),correct_winner/count*100))
print("---------------------")          
print("Overall: WinnerCorrect -> {0:.2f}%".format(total_correct/count/30*100))


CHA: WinnerCorrect -> 75.00%
PHI: WinnerCorrect -> 65.00%
TOR: WinnerCorrect -> 70.00%
BOS: WinnerCorrect -> 55.00%
CLE: WinnerCorrect -> 70.00%
IND: WinnerCorrect -> 70.00%
WSH: WinnerCorrect -> 60.00%
MIL: WinnerCorrect -> 65.00%
MIA: WinnerCorrect -> 60.00%
DET: WinnerCorrect -> 50.00%
NY: WinnerCorrect -> 75.00%
CHI: WinnerCorrect -> 65.00%
ORL: WinnerCorrect -> 50.00%
BKN: WinnerCorrect -> 55.00%
ATL: WinnerCorrect -> 60.00%
HOU: WinnerCorrect -> 70.00%
GS: WinnerCorrect -> 60.00%
POR: WinnerCorrect -> 55.00%
NO: WinnerCorrect -> 70.00%
MIN: WinnerCorrect -> 50.00%
SA: WinnerCorrect -> 50.00%
OKC: WinnerCorrect -> 35.00%
DEN: WinnerCorrect -> 65.00%
LAC: WinnerCorrect -> 55.00%
UTA: WinnerCorrect -> 65.00%
LAL: WinnerCorrect -> 50.00%
SAC: WinnerCorrect -> 70.00%
DAL: WinnerCorrect -> 65.00%
PHX: WinnerCorrect -> 65.00%
MEM: WinnerCorrect -> 60.00%
---------------------
Overall: WinnerCorrect -> 61.00%


In [None]:
def logistic_regression(x_train,x_test,y_train,y_test):
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    X = sc_X.fit_transform(x_train.values)

    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression(random_state = 0)
    classifier.fit(X, y_train)

    x_test_sca = sc_X.transform([x_test])
    y_pred = classifier.predict(x_test_sca)
    
    return y_pred,y_test

## KNN

In [None]:
def knn(x_train,x_test,y_train,y_test):
    from sklearn.preprocessing import StandardScaler
    sc_X = StandardScaler()
    sc_y = StandardScaler()
    X = sc_X.fit_transform(x_train.values)

    from sklearn.neighbors import KNeighborsClassifier
    classifier = KNeighborsClassifier(n_neighbors = 3, metric = 'minkowski', p = 2)
    classifier.fit(X, y_train)

    x_test_sca = sc_X.transform([x_test])
    y_pred = classifier.predict(x_test_sca)
    
    return y_pred,y_test

## SVR

In [18]:
total_correct = 0
final_dev = 0
last_n_games = 100
corrs2 = []
for team in teams:
    team = team.lower()
    correct_winner = 0
    total_deviation = 0
    df = get_dataset(team,schedules,stats)
    count = 0
    for game_number in range(30,50):
        
        begin = game_number-last_n_games-1
        if game_number <= last_n_games:
            begin = 0
        count += 1     
        
        x_train = df.iloc[begin:game_number-1,7:-1]
        y_train = df.iloc[begin:game_number-1,-1]
        x_test = df.iloc[game_number-1,7:-1]
        y_test = df.iloc[game_number-1,-1]
        
        from sklearn.preprocessing import StandardScaler
        sc_X = StandardScaler()
        sc_y = StandardScaler()
        X = sc_X.fit_transform(x_train.values)
        
        from sklearn.neighbors import KNeighborsClassifier
        classifier = KNeighborsClassifier(n_neighbors = 6, metric = 'minkowski', p = 2)
        classifier.fit(X, y_train)
        
        x_test_sca = sc_X.transform([x_test])
        y_pred = classifier.predict(x_test_sca)
        
        if y_pred[0] == y_test:
            correct_winner += 1
        
    corrs2.append(correct_winner/count*100)

    total_correct += correct_winner

    print("{0}: WinnerCorrect -> {1:.2f}%".format(team.upper(),correct_winner/count*100))
print("---------------------")          
print("Overall: WinnerCorrect -> {0:.2f}%".format(total_correct/count/30*100))


CHA: WinnerCorrect -> 70.00%
PHI: WinnerCorrect -> 55.00%
TOR: WinnerCorrect -> 45.00%
BOS: WinnerCorrect -> 45.00%
CLE: WinnerCorrect -> 90.00%
IND: WinnerCorrect -> 75.00%
WSH: WinnerCorrect -> 65.00%
MIL: WinnerCorrect -> 80.00%
MIA: WinnerCorrect -> 45.00%
DET: WinnerCorrect -> 70.00%
NY: WinnerCorrect -> 80.00%
CHI: WinnerCorrect -> 70.00%
ORL: WinnerCorrect -> 45.00%
BKN: WinnerCorrect -> 45.00%
ATL: WinnerCorrect -> 50.00%
HOU: WinnerCorrect -> 70.00%
GS: WinnerCorrect -> 70.00%
POR: WinnerCorrect -> 55.00%
NO: WinnerCorrect -> 60.00%
MIN: WinnerCorrect -> 40.00%
SA: WinnerCorrect -> 60.00%
OKC: WinnerCorrect -> 45.00%
DEN: WinnerCorrect -> 70.00%
LAC: WinnerCorrect -> 50.00%
UTA: WinnerCorrect -> 50.00%
LAL: WinnerCorrect -> 30.00%
SAC: WinnerCorrect -> 50.00%
DAL: WinnerCorrect -> 65.00%
PHX: WinnerCorrect -> 70.00%
MEM: WinnerCorrect -> 60.00%
---------------------
Overall: WinnerCorrect -> 59.17%


In [22]:
total_correct = 0
final_dev = 0
last_n_games = 100
corrs3 = []

for team in teams:
    team = team.lower()
    correct_winner = 0
    total_deviation = 0
    df = get_dataset(team,schedules,stats)
    count = 0
    
    for game_number in range(30,50):
        
        begin = game_number-last_n_games-1
        if game_number <= last_n_games:
            begin = 0
        count += 1     
        
        x_train = df.iloc[begin:game_number-1,7:-1]
        y_train = df.iloc[begin:game_number-1,-1]
        x_test = df.iloc[game_number-1,7:-1]
        y_test = df.iloc[game_number-1,-1]
        
        from sklearn.preprocessing import StandardScaler
        sc_X = StandardScaler()
        sc_y = StandardScaler()
        X = sc_X.fit_transform(x_train.values)
        
        # Training the SVM model on the Training set
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'linear', random_state = 0)
        classifier.fit(X, y_train)
        
        x_test_sca = sc_X.transform([x_test])
        y_pred = classifier.predict(x_test_sca)
        
        if y_pred[0] == y_test:
            correct_winner += 1
        
    corrs3.append(correct_winner/count*100)

    total_correct += correct_winner

    print("{0}: WinnerCorrect -> {1:.2f}%".format(team.upper(),correct_winner/count*100))
print("---------------------")          
print("Overall: WinnerCorrect -> {0:.2f}%".format(total_correct/count/30*100))

CHA: WinnerCorrect -> 90.00%
PHI: WinnerCorrect -> 70.00%
TOR: WinnerCorrect -> 70.00%
BOS: WinnerCorrect -> 55.00%
CLE: WinnerCorrect -> 80.00%
IND: WinnerCorrect -> 75.00%
WSH: WinnerCorrect -> 60.00%
MIL: WinnerCorrect -> 55.00%
MIA: WinnerCorrect -> 50.00%
DET: WinnerCorrect -> 60.00%
NY: WinnerCorrect -> 70.00%
CHI: WinnerCorrect -> 60.00%
ORL: WinnerCorrect -> 50.00%
BKN: WinnerCorrect -> 55.00%
ATL: WinnerCorrect -> 55.00%
HOU: WinnerCorrect -> 70.00%
GS: WinnerCorrect -> 60.00%
POR: WinnerCorrect -> 55.00%
NO: WinnerCorrect -> 65.00%
MIN: WinnerCorrect -> 50.00%
SA: WinnerCorrect -> 55.00%
OKC: WinnerCorrect -> 50.00%
DEN: WinnerCorrect -> 70.00%
LAC: WinnerCorrect -> 60.00%
UTA: WinnerCorrect -> 75.00%
LAL: WinnerCorrect -> 45.00%
SAC: WinnerCorrect -> 70.00%
DAL: WinnerCorrect -> 65.00%
PHX: WinnerCorrect -> 65.00%
MEM: WinnerCorrect -> 55.00%
---------------------
Overall: WinnerCorrect -> 62.17%


In [26]:
total_correct = 0
final_dev = 0
last_n_games = 100
corrs4 = []
for team in teams:
    team = team.lower()
    correct_winner = 0
    total_deviation = 0
    df = get_dataset(team,schedules,stats)
    count = 0
    for game_number in range(30,50):
        
        begin = game_number-last_n_games-1
        if game_number <= last_n_games:
            begin = 0
        count += 1     
        
        x_train = df.iloc[begin:game_number-1,7:-1]
        y_train = df.iloc[begin:game_number-1,-1]
        x_test = df.iloc[game_number-1,7:-1]
        y_test = df.iloc[game_number-1,-1]
        
        from sklearn.preprocessing import StandardScaler
        sc_X = StandardScaler()
        sc_y = StandardScaler()
        X = sc_X.fit_transform(x_train.values)
        
        # Training the SVM model on the Training set
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
        classifier.fit(X, y_train)

        x_test_sca = sc_X.transform([x_test])
        y_pred = classifier.predict(x_test_sca)
        
        if y_pred[0] == y_test:
            correct_winner += 1
        
    corrs4.append(correct_winner/count*100)

    total_correct += correct_winner

    print("{0}: WinnerCorrect -> {1:.2f}%".format(team.upper(),correct_winner/count*100))
print("---------------------")          
print("Overall: WinnerCorrect -> {0:.2f}%".format(total_correct/count/30*100))

CHA: WinnerCorrect -> 60.00%
PHI: WinnerCorrect -> 60.00%
TOR: WinnerCorrect -> 65.00%
BOS: WinnerCorrect -> 35.00%
CLE: WinnerCorrect -> 80.00%
IND: WinnerCorrect -> 75.00%
WSH: WinnerCorrect -> 50.00%
MIL: WinnerCorrect -> 75.00%
MIA: WinnerCorrect -> 50.00%
DET: WinnerCorrect -> 70.00%
NY: WinnerCorrect -> 80.00%
CHI: WinnerCorrect -> 75.00%
ORL: WinnerCorrect -> 50.00%
BKN: WinnerCorrect -> 65.00%
ATL: WinnerCorrect -> 60.00%
HOU: WinnerCorrect -> 55.00%
GS: WinnerCorrect -> 50.00%
POR: WinnerCorrect -> 45.00%
NO: WinnerCorrect -> 45.00%
MIN: WinnerCorrect -> 55.00%
SA: WinnerCorrect -> 45.00%
OKC: WinnerCorrect -> 35.00%
DEN: WinnerCorrect -> 65.00%
LAC: WinnerCorrect -> 60.00%
UTA: WinnerCorrect -> 70.00%
LAL: WinnerCorrect -> 25.00%
SAC: WinnerCorrect -> 60.00%
DAL: WinnerCorrect -> 75.00%
PHX: WinnerCorrect -> 70.00%
MEM: WinnerCorrect -> 65.00%
---------------------
Overall: WinnerCorrect -> 59.00%


In [34]:
corrs = []
models = []
isValid_list = []
for i in range(30):
    if max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]) < 70:
        isValid_list.append("No")
    else:
        isValid_list.append("Yes")
        
        
    if corrs1[i] == max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]):
        models.append("linreg")
    elif corrs2[i] == max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]):
        models.append("svr")
    elif corrs3[i] == max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]):
        models.append("tree")
    elif corrs4[i] == max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]):
        models.append("forest")
        
    corrs.append(max(corrs1[i],corrs2[i],corrs3[i],corrs4[i]))
print(sum(corrs)/len(corrs))
print(models)

67.33333333333333
['tree', 'tree', 'linreg', 'linreg', 'svr', 'svr', 'svr', 'svr', 'linreg', 'svr', 'svr', 'forest', 'linreg', 'forest', 'linreg', 'linreg', 'svr', 'linreg', 'linreg', 'forest', 'svr', 'tree', 'svr', 'tree', 'tree', 'linreg', 'linreg', 'forest', 'svr', 'forest']


In [35]:
total_correct = 0
final_dev = 0
last_n_games = 40
team_valid = 0

for team,model,isValid in zip(teams,models,isValid_list):
    team = team.lower()
    if isValid == "Yes":
        correct_winner = 0
        total_deviation = 0
        df = get_dataset(team,schedules,stats)
        count = 0

        for game_number in range(50,83):
            count += 1

            if game_number <= last_n_games:
                last_n_games = game_number -1

            x_train = df.iloc[game_number-last_n_games-1:game_number-1,[7,8,9,13,15,16,21]]
            y_train = df.iloc[game_number-last_n_games-1:game_number-1,-1]
            x_test = df.iloc[game_number-1,[7,8,9,13,15,16,21]]
            y_test = df.iloc[game_number-1,-1]

            if model == "linreg":
                from sklearn.preprocessing import StandardScaler
                sc_X = StandardScaler()
                sc_y = StandardScaler()
                X = sc_X.fit_transform(x_train.values)

                from sklearn.linear_model import LogisticRegression
                classifier = LogisticRegression(random_state = 0)
                classifier.fit(X, y_train)

                x_test_sca = sc_X.transform([x_test])
                y_pred = classifier.predict(x_test_sca)
            elif model == "svr":
                #SVR
                from sklearn.preprocessing import StandardScaler
                sc_X = StandardScaler()
                sc_y = StandardScaler()
                X = sc_X.fit_transform(x_train.values)

                from sklearn.neighbors import KNeighborsClassifier
                classifier = KNeighborsClassifier(n_neighbors = 6, metric = 'minkowski', p = 2)
                classifier.fit(X, y_train)

                x_test_sca = sc_X.transform([x_test])
                y_pred = classifier.predict(x_test_sca)

            elif model == "tree":
                from sklearn.preprocessing import StandardScaler
                sc_X = StandardScaler()
                sc_y = StandardScaler()
                X = sc_X.fit_transform(x_train.values)

                # Training the SVM model on the Training set
                from sklearn.svm import SVC
                classifier = SVC(kernel = 'linear', random_state = 0)
                classifier.fit(X, y_train)

                x_test_sca = sc_X.transform([x_test])
                y_pred = classifier.predict(x_test_sca)
            elif model == "forest":
                # Random Forest
                from sklearn.preprocessing import StandardScaler
                sc_X = StandardScaler()
                sc_y = StandardScaler()
                X = sc_X.fit_transform(x_train.values)

                # Training the SVM model on the Training set
                from sklearn.ensemble import RandomForestClassifier
                classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
                classifier.fit(X, y_train)

                x_test_sca = sc_X.transform([x_test])
                y_pred = classifier.predict(x_test_sca)


            if y_pred[0] == y_test:
                correct_winner += 1
            
        total_correct += correct_winner
        print("{0}: WinnerCorrect -> {1:.2f}%".format(team.upper(),correct_winner/count*100))
        team_valid += 1
    else:
        print("{0} cannot be predicted".format(team.upper()))
print("---------------------")          
print("Overall: WinnerCorrect -> {0:.2f}%".format(total_correct/count/team_valid*100))


CHA: WinnerCorrect -> 51.52%
PHI: WinnerCorrect -> 60.61%
TOR: WinnerCorrect -> 66.67%
BOS cannot be predicted
CLE: WinnerCorrect -> 72.73%
IND: WinnerCorrect -> 78.79%
WSH cannot be predicted
MIL: WinnerCorrect -> 60.61%
MIA cannot be predicted
DET: WinnerCorrect -> 57.58%
NY: WinnerCorrect -> 78.79%
CHI: WinnerCorrect -> 63.64%
ORL cannot be predicted
BKN cannot be predicted
ATL cannot be predicted
HOU: WinnerCorrect -> 78.79%
GS: WinnerCorrect -> 54.55%
POR cannot be predicted
NO: WinnerCorrect -> 51.52%
MIN cannot be predicted
SA cannot be predicted
OKC cannot be predicted
DEN: WinnerCorrect -> 60.61%
LAC cannot be predicted
UTA: WinnerCorrect -> 84.85%
LAL cannot be predicted
SAC: WinnerCorrect -> 72.73%
DAL: WinnerCorrect -> 66.67%
PHX: WinnerCorrect -> 75.76%
MEM cannot be predicted
---------------------
Overall: WinnerCorrect -> 66.84%
