In [1]:
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import GridSearchCV
from sqlalchemy import create_engine
import psycopg2
import json

In [2]:
password = 'Techbobcats=0618'
#your postgresql db
engine = create_engine(f'postgresql://postgres:{password}@localhost/NBA')

In [3]:
# Read DF
df = pd.read_sql_query('Select * from public."Games"', con = engine)
df = df.dropna()

# Divide East and West
EastDF = df.loc[(df['CONFERENCE']=='EAST')]
WestDF = df.loc[(df['CONFERENCE']=='WEST')]

EastTeam = EastDF['TEAM_ABBREVIATION'].unique()
WestTeam = WestDF['TEAM_ABBREVIATION'].unique()

# Final Versions of East and West DF
EastDF = EastDF.drop(['TEAM_ID', 'TEAM_ABBREVIATION', 'SEASON_ID', 'TEAM_NAME', 'GAME_ID', 'MIN'\
                      , 'GAME_DATE', 'PLUS_MINUS', 'AST', 'TOV', 'CONFERENCE'], axis = 1)
WestDF = WestDF.drop(['TEAM_ID', 'TEAM_ABBREVIATION', 'SEASON_ID', 'TEAM_NAME', 'GAME_ID', 'MIN'\
                      , 'GAME_DATE', 'PLUS_MINUS', 'AST', 'TOV', 'CONFERENCE'], axis = 1)

WestDF = WestDF.reset_index(drop=True)
EastDF = EastDF.reset_index(drop=True)

In [4]:
EastTeams_DF = {}
WestTeams_DF = {}

for item in EastTeam:
    for index, row in EastDF.iterrows():
        if index < 15 and item!=EastTeam[index]:
            EastTeams_DF[f'{item}{index}']=EastDF.loc[(EastDF["MATCHUP"]== f'{item} @ {EastTeam[index]}')| \
                                                  (EastDF["MATCHUP"]==f"{item} vs. {EastTeam[index]}")]
            
for item in WestTeam:
    for index, row in WestDF.iterrows():
        if index < 15 and item!=WestTeam[index]:
            WestTeams_DF[f'{item}{index}']=WestDF.loc[(WestDF["MATCHUP"]== f'{item} @ {WestTeam[index]}')| \
                                                  (WestDF["MATCHUP"]==f"{item} vs. {WestTeam[index]}")]
            


In [5]:
EastDctDum = []
WestDctDum = []

for item in EastTeam:
    for index, row in EastDF.iterrows():
        try:
            if index < 15 and item!=EastTeam[index]:    
                EDDUM1 = pd.get_dummies(EastTeams_DF[f'{item}{index}'])
                EastDctDum.append(EDDUM1)
        except:
            pass
        
for item in WestTeam:
    for index, row in WestDF.iterrows():
        try:
            if index < 15 and item!=WestTeam[index]:    
                WDDUM1 = pd.get_dummies(WestTeams_DF[f'{item}{index}'])
                WestDctDum.append(WDDUM1)
        except:
            pass

WestDctDum[0]

Unnamed: 0,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,STL,BLK,PF,MATCHUP_UTA @ GSW,MATCHUP_UTA vs. GSW,WL_L,WL_W
476,107,38,89,0.427,15,46,0.326,16,22,0.727,12,35,47,8,2,17,1,0,1,0
690,111,41,89,0.461,17,51,0.333,12,17,0.706,11,41,52,3,9,16,0,1,0,1
758,92,31,81,0.383,11,35,0.314,19,23,0.826,17,34,51,7,2,13,1,0,1,0
848,116,40,92,0.435,14,44,0.318,22,28,0.786,15,30,45,7,0,21,0,1,1,0
1464,116,41,91,0.451,13,44,0.295,21,24,0.875,13,35,48,7,1,20,1,0,1,0
1721,119,41,93,0.441,15,44,0.341,22,29,0.759,18,33,51,6,6,16,1,0,1,0
1900,127,45,94,0.479,20,50,0.4,17,22,0.773,14,43,57,6,4,20,0,1,0,1
2489,129,50,93,0.538,17,40,0.425,12,23,0.522,15,41,56,5,3,19,1,0,0,1
2666,114,37,79,0.468,16,38,0.421,24,31,0.774,7,36,43,5,3,20,0,1,0,1
2761,113,43,85,0.506,14,32,0.438,13,16,0.813,8,40,48,4,10,14,0,1,0,1


In [6]:
count = list(range(0, len(EastDctDum), 1))
# for i in count:
#     Home = DctDum[i].columns[16][8:11]
#     print(Home, i)
EastDctDum[0].columns[16][8:11]

'ORL'

In [7]:
#import time
def east_loop_scaler(EastDctDum):
    count =  list(range(0, len(EastDctDum), 1))
    
    TrainingScore = []
    TestingScore = []
    ScaledTrainingScore = []
    ScaledTestingScore = []
    PercentageWin = []
    PercentageLoss = []
    For = []
    Against = []
    Versus = []

    for i in count:
        steps = [
            ('pca', PCA(n_components=.99)),
            ('rf', RandomForestClassifier())]#,
            #('clf', LogisticRegression())]


        Home = EastDctDum[i].columns[16][8:11]
        Away = EastDctDum[i].columns[16][14:]
        y = EastDctDum[i]['WL_W'].values
        x = EastDctDum[i].drop(['WL_W', 'WL_L'], axis = 1)
        V = f'{Home} V {Away}'


        xTrain, xTest, yTrain, yTest = train_test_split(x, y, random_state=31, shuffle=False)
        yTDF = pd.DataFrame(yTrain)
        yTDF1 = yTDF.loc[yTDF[0]==1].count()[0]
        yTDF2 = yTDF.loc[yTDF[0]==0].count()[0]

        pipe = Pipeline(steps)
        pipe.fit(xTrain, yTrain)

        scaler = StandardScaler().fit(xTrain)
        xTrainScaled = scaler.transform(xTrain)
        xTestScaled = scaler.transform(xTest)

        TrainScore = pipe.score(xTrain, yTrain)
        TestScore = pipe.score(xTest, yTest)
        ScaledTrainScore = pipe.score(xTrainScaled, yTrain)
        ScaledTestScore = pipe.score(xTestScaled, yTest)

        Logpredict = pipe.predict(xTrain)
        Logpddf = pd.DataFrame(Logpredict)
        Logloss = Logpddf.loc[Logpddf[0]==0].count()[0]
        Logwin = Logpddf.loc[Logpddf[0]==1].count()[0]
        percentWin = (Logwin/(Logwin+Logloss))
        percentLoss = (1-percentWin)

        TrainingScore.append(TrainScore)
        TestingScore.append(TestScore)
        ScaledTrainingScore.append(ScaledTrainScore)
        ScaledTestingScore.append(ScaledTestScore)
        PercentageWin.append(percentWin)
        PercentageLoss.append(percentLoss)
        For.append(Home)
        Against.append(Away)
        Versus.append(V)

    return TrainingScore, TestingScore, ScaledTrainingScore, ScaledTestingScore, PercentageWin, PercentageLoss, For, Against, Versus

In [8]:
#import time
def west_loop_scaler(WestDctDum):
    count =  list(range(0, len(WestDctDum), 1))
    
    TrainingScore = []
    TestingScore = []
    ScaledTrainingScore = []
    ScaledTestingScore = []
    PercentageWin = []
    PercentageLoss = []
    For = []
    Against = []
    Versus = []

    for i in count:
        steps = [
            ('pca', PCA(n_components=.99)),
            ('rf', RandomForestClassifier())]#,
            #('clf', LogisticRegression())]


        Home = WestDctDum[i].columns[16][8:11]
        Away = WestDctDum[i].columns[16][14:]
        y = WestDctDum[i]['WL_W'].values
        x = WestDctDum[i].drop(['WL_W', 'WL_L'], axis = 1)
        V = f'{Home} V {Away}'


        xTrain, xTest, yTrain, yTest = train_test_split(x, y, random_state=31, shuffle=False)
        yTDF = pd.DataFrame(yTrain)
        yTDF1 = yTDF.loc[yTDF[0]==1].count()[0]
        yTDF2 = yTDF.loc[yTDF[0]==0].count()[0]

        pipe = Pipeline(steps)
        pipe.fit(xTrain, yTrain)

        scaler = StandardScaler().fit(xTrain)
        xTrainScaled = scaler.transform(xTrain)
        xTestScaled = scaler.transform(xTest)

        TrainScore = pipe.score(xTrain, yTrain)
        TestScore = pipe.score(xTest, yTest)
        ScaledTrainScore = pipe.score(xTrainScaled, yTrain)
        ScaledTestScore = pipe.score(xTestScaled, yTest)

        Logpredict = pipe.predict(xTrain)
        Logpddf = pd.DataFrame(Logpredict)
        Logloss = Logpddf.loc[Logpddf[0]==0].count()[0]
        Logwin = Logpddf.loc[Logpddf[0]==1].count()[0]
        percentWin = (Logwin/(Logwin+Logloss))
        percentLoss = (1-percentWin)

        TrainingScore.append(TrainScore)
        TestingScore.append(TestScore)
        ScaledTrainingScore.append(ScaledTrainScore)
        ScaledTestingScore.append(ScaledTestScore)
        PercentageWin.append(percentWin)
        PercentageLoss.append(percentLoss)
        For.append(Home)
        Against.append(Away)
        Versus.append(V)

    return TrainingScore, TestingScore, ScaledTrainingScore, ScaledTestingScore, PercentageWin, PercentageLoss, For, Against, Versus

In [9]:
EastScore = east_loop_scaler(EastDctDum)
EastScoreDF = pd.DataFrame(EastScore)
EastScoreDF = EastScoreDF.rename(columns=EastScoreDF.iloc[8]).drop(EastScoreDF.index[8])
EastScoreDF

Unnamed: 0,ORL V CLE,ORL V ATL,ORL V IND,ORL V BOS,ORL V CHI,ORL V MIA,ORL V CHA,ORL V WAS,ORL V TOR,ORL V PHI,...,BKN V BOS,BKN V CHI,BKN V MIA,BKN V CHA,BKN V WAS,BKN V TOR,BKN V PHI,BKN V MIL,BKN V DET,BKN V NYK
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.166667,0.666667,0.8,0.666667,0.6,0.428571,0.833333,0.6,1.0,0.666667,...,0.5,1.0,0.666667,0.4,0.166667,1.0,0.625,0.625,0.333333,0.714286
2,0.5,0.555556,0.733333,0.8125,0.533333,0.631579,0.555556,0.533333,0.823529,0.733333,...,0.541667,0.357143,0.6,0.384615,0.5625,0.75,0.636364,0.545455,0.8,0.421053
3,0.833333,0.333333,1.0,0.666667,0.2,0.285714,1.0,0.8,0.833333,0.833333,...,0.125,0.2,0.333333,0.6,0.5,0.857143,0.625,0.875,0.333333,0.571429
4,0.6875,0.444444,0.333333,0.1875,0.533333,0.368421,0.444444,0.466667,0.176471,0.266667,...,0.541667,0.642857,0.4,0.615385,0.5625,0.25,0.363636,0.454545,0.8,0.736842
5,0.3125,0.555556,0.666667,0.8125,0.466667,0.631579,0.555556,0.533333,0.823529,0.733333,...,0.458333,0.357143,0.6,0.384615,0.4375,0.75,0.636364,0.545455,0.2,0.263158
6,ORL,ORL,ORL,ORL,ORL,ORL,ORL,ORL,ORL,ORL,...,BKN,BKN,BKN,BKN,BKN,BKN,BKN,BKN,BKN,BKN
7,CLE,ATL,IND,BOS,CHI,MIA,CHA,WAS,TOR,PHI,...,BOS,CHI,MIA,CHA,WAS,TOR,PHI,MIL,DET,NYK


In [10]:
WestScore = west_loop_scaler(WestDctDum)
WestScoreDF = pd.DataFrame(WestScore)
WestScoreDF = WestScoreDF.rename(columns=WestScoreDF.iloc[8]).drop(WestScoreDF.index[8])
WestScoreDF

Unnamed: 0,UTA V GSW,UTA V LAC,UTA V NOP,UTA V HOU,UTA V SAS,UTA V DEN,UTA V LAL,UTA V DAL,UTA V POR,UTA V OKC,...,MIN V HOU,MIN V SAS,MIN V DEN,MIN V LAL,MIN V DAL,MIN V POR,MIN V OKC,MIN V MEM,MIN V PHX,MIN V SAC
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,0.833333,0.5,0.833333,1.0,0.333333,0.571429,0.6,0.75,0.857143,0.428571,...,0.428571,0.833333,0.285714,0.5,0.7,0.6,1.0,0.571429,0.6,0.6
2,0.588235,0.535714,0.333333,0.541667,0.5,0.571429,0.5,0.52381,0.666667,0.380952,...,0.5,0.6,0.333333,0.352941,0.481481,0.466667,0.375,0.809524,0.769231,0.538462
3,0.166667,0.5,0.166667,0.75,0.166667,0.571429,0.2,0.25,0.571429,0.857143,...,1.0,0.166667,0.714286,0.333333,0.0,0.6,0.5,0.571429,0.6,0.8
4,0.588235,0.535714,0.666667,0.458333,0.5,0.571429,0.5,0.47619,0.666667,0.619048,...,0.5,0.6,0.333333,0.647059,0.518519,0.466667,0.625,0.190476,0.230769,0.538462
5,0.411765,0.464286,0.333333,0.541667,0.5,0.428571,0.5,0.52381,0.333333,0.380952,...,0.5,0.4,0.666667,0.352941,0.481481,0.533333,0.375,0.809524,0.769231,0.461538
6,UTA,UTA,UTA,UTA,UTA,UTA,UTA,UTA,UTA,UTA,...,MIN,MIN,MIN,MIN,MIN,MIN,MIN,MIN,MIN,MIN
7,GSW,LAC,NOP,HOU,SAS,DEN,LAL,DAL,POR,OKC,...,HOU,SAS,DEN,LAL,DAL,POR,OKC,MEM,PHX,SAC


In [11]:
EastScoreDF = EastScoreDF.rename(index={0: 'Training Score', 1: 'Testing Score', 2: 'Scaled Training Score', 3: 'Scaled Testing Score',
                               4: 'Chance of Winning', 5: 'Chance of Losing', 6: 'Team', 7: 'Versus'})
EastScoreDF = EastScoreDF.transpose()
EastScoreDF

Unnamed: 0,Training Score,Testing Score,Scaled Training Score,Scaled Testing Score,Chance of Winning,Chance of Losing,Team,Versus
ORL V CLE,1.0,0.166667,0.5,0.833333,0.6875,0.3125,ORL,CLE
ORL V ATL,1.0,0.666667,0.555556,0.333333,0.444444,0.555556,ORL,ATL
ORL V IND,1.0,0.8,0.733333,1.0,0.333333,0.666667,ORL,IND
ORL V BOS,1.0,0.666667,0.8125,0.666667,0.1875,0.8125,ORL,BOS
ORL V CHI,1.0,0.6,0.533333,0.2,0.533333,0.466667,ORL,CHI
...,...,...,...,...,...,...,...,...
BKN V TOR,1.0,1.0,0.75,0.857143,0.25,0.75,BKN,TOR
BKN V PHI,1.0,0.625,0.636364,0.625,0.363636,0.636364,BKN,PHI
BKN V MIL,1.0,0.625,0.545455,0.875,0.454545,0.545455,BKN,MIL
BKN V DET,1.0,0.333333,0.8,0.333333,0.8,0.2,BKN,DET


In [12]:
WestScoreDF = WestScoreDF.rename(index={0: 'Training Score', 1: 'Testing Score', 2: 'Scaled Training Score', 3: 'Scaled Testing Score',
                               4: 'Chance of Winning', 5: 'Chance of Losing', 6: 'Team', 7: 'Versus'})
WestScoreDF = WestScoreDF.transpose()
WestScoreDF

Unnamed: 0,Training Score,Testing Score,Scaled Training Score,Scaled Testing Score,Chance of Winning,Chance of Losing,Team,Versus
UTA V GSW,1.0,0.833333,0.588235,0.166667,0.588235,0.411765,UTA,GSW
UTA V LAC,1.0,0.5,0.535714,0.5,0.535714,0.464286,UTA,LAC
UTA V NOP,1.0,0.833333,0.333333,0.166667,0.666667,0.333333,UTA,NOP
UTA V HOU,1.0,1.0,0.541667,0.75,0.458333,0.541667,UTA,HOU
UTA V SAS,1.0,0.333333,0.5,0.166667,0.5,0.5,UTA,SAS
...,...,...,...,...,...,...,...,...
MIN V POR,1.0,0.6,0.466667,0.6,0.466667,0.533333,MIN,POR
MIN V OKC,1.0,1.0,0.375,0.5,0.625,0.375,MIN,OKC
MIN V MEM,1.0,0.571429,0.809524,0.571429,0.190476,0.809524,MIN,MEM
MIN V PHX,1.0,0.6,0.769231,0.6,0.230769,0.769231,MIN,PHX


In [13]:
EastScoreDF.to_sql(name='East_Predictions', con=engine, if_exists='replace', index=False)
WestScoreDF.to_sql(name='West_Predictions', con=engine, if_exists='replace', index=False)
EastScoreDF.to_csv('East_Predictions.csv')
WestScoreDF.to_csv('West_Predictions.csv')