<h1>Player Prediction Machine Learning Models - Batting</h1>

<h4>Import Dependencies</h4>

In [5]:
%matplotlib inline

In [61]:
import warnings
warnings.simplefilter('ignore')

import os
import csv
import pandas as pd

import sqlite3
import csv
from sqlalchemy import create_engine

import matplotlib.pyplot as plt
import numpy as np

from config import pgPassword

<h4>Create a connection to SQL database</h4>

In [2]:
pg_user = 'postgres'
pg_password = pgPassword
db_name = 'baseball_db'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')

<h4>Read in databases tables as DatFrames</h4>

In [3]:
teamsStatsDF = pd.read_sql_table("Team-Stats", con = engine)
battingDF = pd.read_sql_table("Batting", con = engine)
pitchingDF = pd.read_sql_table("Pitching", con = engine)
playersDF = pd.read_sql_table("Players", con = engine)
franchisesDF = pd.read_sql_table("Franchises", con = engine)
salariesDF = pd.read_sql_table("Salaries", con = engine)
teamsDF = pd.read_sql_table("Teams", con = engine)
fPlayersDF = pd.read_sql_table("FranchisePlayers", con = engine)

In [4]:
battingDF.head()

Unnamed: 0,yearID,stint,G,R,H,HR,BB,IBB,SO,SB,fpID
0,1954,1,35,2,10,0,3,0.0,15.0,0.0,336907024
1,1955,1,46,1,2,0,1,0.0,0.0,0.0,336907024
2,1956,1,49,4,11,0,2,0.0,28.0,0.0,336907024
3,1957,1,49,4,14,0,6,0.0,26.0,0.0,336907024
4,1958,1,46,1,6,0,5,0.0,13.0,1.0,336907024


In [7]:
# Function to do the dataManipulation steps in PlayerBattingML
def manipulateDF(dfBatting, dfPlayers, dfFranchPlayers, latestDataYear, stat):
    #get only data from latest year forward
    dfBatting = dfBatting[dfBatting["yearID"] >= latestDataYear]
    #combine stats of players with multiple stints in a year
    dfBatting = dfBatting.groupby(["yearID", "fpID"]).sum().reset_index()
    
    # Merge FranchisePlayer and PLayer Tables
    dfFP = dfFranchPlayers[["playerID", "fpID"]]
    playerMergedDF = pd.merge(dfPlayers, dfFP, on = ["playerID"])
    # Get years out of columns
    playerMergedDF["debutYear"] = playerMergedDF["debut"].dt.year
    playerMergedDF["finalYear"] = playerMergedDF["finalGame"].dt.year
    playerMergedDF["totalYears"] = playerMergedDF.finalYear + 1 - playerMergedDF.debutYear
    playerMergedDF = playerMergedDF[["fpID", "playerID", "birthYear", "debutYear", "finalYear", "totalYears"]]
    
    # Merga Player and Batting Data
    mergedBatting = pd.merge(dfBatting, playerMergedDF, on = ["fpID"])
    
    # Normalize statistics to be per game

    # Divide statistics by games played
    mergedBatting["RpG"] = mergedBatting.R / mergedBatting.G
    mergedBatting["HpG"] = mergedBatting.H / mergedBatting.G
    mergedBatting["HRpG"] = mergedBatting.HR / mergedBatting.G
    mergedBatting["BBpG"] = (mergedBatting.BB + mergedBatting.IBB) / mergedBatting.G
    mergedBatting["SOpG"] = mergedBatting.SO / mergedBatting.G
    mergedBatting["SBpG"] = mergedBatting.SB / mergedBatting.G

    #Limit to only columns of interest
    desiredCols = ["yearID", "fpID", "G","RpG", "HpG", "HRpG", "BBpG", "SOpG", "SBpG",
                   "birthYear", "debutYear", "finalYear", "totalYears"]
    mergedBatting = mergedBatting[desiredCols]
    
    # Add age and careerYears columns
    mergedBatting["careerYear"] = mergedBatting.yearID + 1 - mergedBatting.debutYear
    mergedBatting["age"] = mergedBatting.yearID - mergedBatting.birthYear
    
    # Add a column to indicate rows that should be skipped
    #  years < latestDataYear + 2 (we can't get 2 year previous data for these years)
    #  careerYear < 3 (we can't get 2 year previous data for these years)
    #  year = finalYear (we can't use next year to check model)
    #  year = 2019 (last year of our data so )

    mergedBatting["skip"] = 0
    for index, row in mergedBatting.iterrows():
        if row["yearID"] < (latestDataYear + 2):
            mergedBatting.at[index, "skip"] = 1
        elif row["careerYear"] < 3:
            mergedBatting.at[index, "skip"] = 1
        elif row["yearID"] == row["finalYear"]:
            mergedBatting.at[index, "skip"] = 1
        elif row["yearID"] == 2019:
            mergedBatting.at[index, "skip"] = 1
            
    #  Sort by player and year - to get career all together
    sortedBatting = mergedBatting.sort_values(by = ["fpID", "yearID"])
    sortedBatting = sortedBatting.reset_index(drop=True)
    
    
    mlDF = sortedBatting.copy()
# playersMLBatting = playersMLBatting.drop(columns=["birthYear", "debuYear"])

    # Iterate through the sorted batting and grab previous stats
    # Make Columns labels based on stat
    p2Label = "p2-" + stat
    p1Label = "p1-" + stat
    f1Label = "f1-" + stat
    # Add those columns to DF
    mlDF[p2Label] = ""
    mlDF[p1Label] = ""
    mlDF[f1Label] = ""

    # Itterate through DF and populate those columns
    for index, row in mlDF.iterrows():
        if row["skip"] == 1:
            continue
        p2Stat = mlDF.iloc[index - 2][stat]
        p1Stat = mlDF.iloc[index - 1][stat]
        f1Stat = mlDF.iloc[index + 1][stat]

        mlDF.at[index, p2Label] = p2Stat
        mlDF.at[index, p1Label] = p1Stat
        mlDF.at[index, f1Label] = f1Stat
        
    # Get rid of the skipped rows, then all complete data
    mlData = mlDF.loc[mlDF['skip'] == 0]
    
    return mlData


In [8]:
rpgDF = manipulateDF(battingDF, playersDF, fPlayersDF, 1980, 'RpG')
rpgDF.tail(20)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-RpG,p1-RpG,f1-RpG
285170,2009,698387328,159,0.616352,0.962264,0.226415,0.622642,0.981132,0.125786,1979.0,2002,2017,16,8,30.0,0,0.457447,0.544776,0.679487
285171,2010,698387328,156,0.679487,1.051282,0.173077,0.564103,0.942308,0.083333,1979.0,2002,2017,16,9,31.0,0,0.544776,0.616352,0.46
285172,2011,698387328,150,0.46,0.866667,0.133333,0.526667,1.066667,0.126667,1979.0,2002,2017,16,10,32.0,0,0.616352,0.679487,0.518519
285173,2012,698387328,81,0.518519,1.111111,0.061728,0.54321,0.703704,0.098765,1979.0,2002,2017,16,11,33.0,0,0.679487,0.46,0.651163
285174,2013,698387328,129,0.651163,1.139535,0.193798,0.488372,0.782946,0.077519,1979.0,2002,2017,16,12,34.0,0,0.46,0.518519,0.578231
285175,2014,698387328,147,0.578231,1.061224,0.108844,0.585034,0.768707,0.061224,1979.0,2002,2017,16,13,35.0,0,0.518519,0.651163,0.579545
285176,2015,698387328,88,0.579545,0.829545,0.136364,0.431818,0.954545,0.0,1979.0,2002,2017,16,14,36.0,0,0.651163,0.578231,0.587413
285177,2016,698387328,143,0.587413,0.895105,0.146853,0.496503,0.972028,0.034965,1979.0,2002,2017,16,15,37.0,0,0.578231,0.579545,0.5
285181,2007,698440183,162,0.611111,1.074074,0.148148,0.395062,0.771605,0.024691,1984.0,2005,2019,15,3,23.0,0,0.3,0.535032,0.481132
285182,2008,698440183,106,0.481132,1.141509,0.132075,0.301887,0.669811,0.009434,1984.0,2005,2019,15,4,24.0,0,0.535032,0.611111,0.700637


In [168]:
# Function to Split data and run model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

def run_ML_Model(mlDF, stat):
    mlData = mlDF.loc[mlDF['skip'] == 0]
    p2Label = "p2-" + stat
    p1Label = "p1-" + stat
    f1Label = "f1-" + stat
    
#     print("== All Inputs ==")
    inputFactors = [p2Label, p1Label, "RpG", "HpG", "HRpG", "BBpG", "age"]
    X = mlData[inputFactors]
    y = mlData[[f1Label]]
    
    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=5)
    
    #LinearModel===============
    model_lin = LinearRegression()
    model_lin.fit(X_train, y_train)
    # Calculate the R2 scores
    training_score_lin = model_lin.score(X_train, y_train)
    testing_score_lin = model_lin.score(X_test, y_test)
    # Make Predictions and get MSE
    preds_lin = model_lin.predict(X_test)
    MSE_lin = mean_squared_error(y_test, preds_lin)
    # Get coeffs and Y-int
    coeffs_lin = model_lin.coef_.tolist()[0]
    y_int_lin = model_lin.intercept_.tolist()[0]
    print("===================")
    print("=== Linear ===")
    print(f"{stat}: Training Score: {training_score_lin}")
    print(f"{stat}:Testing Score: {testing_score_lin}")
    print(f"{stat}:MSE: {MSE_lin}")
    print('Weight coefficients: ', coeffs_lin)
    print('y-axis intercept: ', y_int_lin)
    print("===================")
    
    
    #===============================RidgeModel===============
    model_ridge = Ridge(alpha=0.01).fit(X_train, y_train)
    # Calculate the R2 scores
    training_score_ridge= model_ridge.score(X_train, y_train)
    testing_score_ridge = model_ridge.score(X_test, y_test)
    # Make Predictions and get MSE
    preds_ridge = model_ridge.predict(X_test)
    MSE_ridge = mean_squared_error(y_test, preds_ridge)
    # Get coeffs and Y-int
    coeffs_ridge= model_ridge.coef_.tolist()[0]
    y_int_ridge = model_ridge.intercept_.tolist()[0]
    print("===================")
    print("=== Ridge ===")
    print(f"{stat}: Training Score: {training_score_ridge}")
    print(f"{stat}:Testing Score: {testing_score_ridge}")
    print(f"{stat}:MSE: {MSE_ridge}")
    print('Weight coefficients: ', coeffs_ridge)
    print('y-axis intercept: ', y_int_ridge)
    print("===================")
    
    #=================================LassoModel===============
    model_lasso = Lasso(alpha=0.01).fit(X_train, y_train)
    # Calculate the R2 scores
    training_score_lasso= model_lasso.score(X_train, y_train)
    testing_score_lasso = model_lasso.score(X_test, y_test)
    # Make Predictions and get MSE
    preds_lasso = model_lasso.predict(X_test)
    MSE_lasso = mean_squared_error(y_test, preds_lasso)
    # Get coeffs and Y-int
    coeffs_lasso= model_lasso.coef_.tolist()[0]
    y_int_lasso = model_lasso.intercept_.tolist()[0]
    print("===================")
    print("=== Lasso ===")
    print(f"{stat}: Training Score: {training_score_lasso}")
    print(f"{stat}:Testing Score: {testing_score_lasso}")
    print(f"{stat}:MSE: {MSE_lasso}")
    print('Weight coefficients: ', coeffs_lasso)
    print('y-axis intercept: ', y_int_lasso)
    print("===================")
    
    #================================ElasticNetModel===============
    model_eNet = ElasticNet(alpha=.01, l1_ratio=0.001).fit(X_train, y_train)
    # Calculate the R2 scores
    training_score_eNet= model_eNet.score(X_train, y_train)
    testing_score_eNet = model_eNet.score(X_test, y_test)
    # Make Predictions and get MSE
    preds_eNet = model_eNet.predict(X_test)
    MSE_eNet = mean_squared_error(y_test, preds_eNet)
    # Get coeffs and Y-int
    coeffs_eNet= model_eNet.coef_.tolist()[0]
    y_int_eNet = model_eNet.intercept_.tolist()[0]
    print("===================")
    print("=== Elastic Net ===")
    print(f"{stat}: Training Score: {training_score_eNet}")
    print(f"{stat}:Testing Score: {testing_score_eNet}")
    print(f"{stat}:MSE: {MSE_eNet}")
    print('Weight coefficients: ', coeffs_eNet)
    print('y-axis intercept: ', y_int_eNet)
    print("===================")
    
    
#     inputFactors2 = [p2Label, p1Label, stat]
#     X2 = mlData[inputFactors2]
#     y2 = mlData[[f1Label]]
    
#     # Split Data
#     X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, random_state=5)
    
#     #LinearModel===============
#     model_lin2 = LinearRegression()
#     model_lin2.fit(X_train2, y_train2)
#     # Calculate the R2 scores
#     training_score_lin2 = model_lin2.score(X_train2, y_train2)
#     testing_score_lin2 = model_lin2.score(X_test2, y_test2)
#     # Get coeffs and Y-int
#     coeffs_lin2 = model_lin2.coef_.tolist()[0]
#     y_int_lin2 = model_lin2.intercept_.tolist()[0]
# #     print("===================")
# #     print("=== Linear ===")
# #     print(f"{stat}: Training Score: {training_score_lin2}")
# #     print(f"{stat}:Testing Score: {testing_score_lin2}")
# #     print('Weight coefficients: ', coeffs_lin2)
# #     print('y-axis intercept: ', y_int_lin2)
# #     print("===================")

    
    
    
#     inputFactors.append("Y-Int")
    
#     coeffs_lin.append(y_int_lin)
#     mlFormulaLin = {'Stat': stat,
#                  'Input': inputFactors,
#                  'Coeff': coeffs_lin,
#                 'TrainingScore': training_score_lin,
#                 'TestingScore': testing_score_lin}
    
#     coeffs_log.append(y_int_log)
#     mlFormulaLog = {'Stat': stat,
#                  'Input': inputFactors,
#                  'Coeff': coeffs_log,
#                 'TrainingScore': training_score_log,
#                 'TestingScore': testing_score_log}
    
#     mlFormulaDF = pd.DataFrame(rpgFormula)
    
    return (model_lin, model_ridge, model_lasso, model_eNet)

    
    
    

In [103]:
# Comment after done - Takes a long time to execute

# rpgDF = manipulateDF(battingDF, playersDF, fPlayersDF, 1980, 'RpG')

In [175]:
rpgModel = run_ML_Model(rpgDF, "RpG")

=== Linear ===
RpG: Training Score: 0.84337092332774
RpG:Testing Score: 0.841729462035492
RpG:MSE: 0.009396517864563088
Weight coefficients:  [0.0935087652710313, 0.19032005936393018, 0.34681492893746074, 0.11438742553923785, 0.08044735246888794, 0.09275494575797472, -0.004843151106728904]
y-axis intercept:  0.1512060441742148
=== Ridge ===
RpG: Training Score: 0.8433709233265649
RpG:Testing Score: 0.8417294634809667
RpG:MSE: 0.009396517778745285
Weight coefficients:  [0.09350917929964525, 0.1903197498927312, 0.3468110328418721, 0.11438900259028731, 0.08044748903479784, 0.09275586512667051, -0.004843157409594514]
y-axis intercept:  0.15120622211541263
=== Lasso ===
RpG: Training Score: 0.7804685053932657
RpG:Testing Score: 0.7789055329363639
RpG:MSE: 0.013126372957583503
Weight coefficients:  0.0
y-axis intercept:  0.13560995488749977
=== Elastic Net ===
RpG: Training Score: 0.8404590502960463
RpG:Testing Score: 0.8388427666144901
RpG:MSE: 0.009567900899219094
Weight coefficients:  0.1

In [105]:
# Comment after done - Takes a long time to execute

# hpgDF = manipulateDF(battingDF, playersDF, fPlayersDF, 1980, 'HpG')

In [176]:
hpgModel = run_ML_Model(hpgDF, "HpG")

=== Linear ===
HpG: Training Score: 0.8715164888744652
HpG:Testing Score: 0.8687481175957179
HpG:MSE: 0.02565439673135623
Weight coefficients:  [0.10314856376993395, 0.19415132905121496, 0.18162821828050904, 0.5194927746888385, 0.0012229294781802417, 0.06165500712347907, -0.0074503876487092]
y-axis intercept:  0.24246860780617735
=== Ridge ===
HpG: Training Score: 0.8715164888743224
HpG:Testing Score: 0.8687481188972124
HpG:MSE: 0.02565439647696708
Weight coefficients:  [0.10314890874553008, 0.19415171220892843, 0.18162862375727654, 0.5194915679911042, 0.0012240432000838162, 0.061655073194748874, -0.0074503936538856015]
y-axis intercept:  0.24246885228885728
=== Lasso ===
HpG: Training Score: 0.8669048161755222
HpG:Testing Score: 0.8642846198656439
HpG:MSE: 0.026526828726077065
Weight coefficients:  0.08841199888777326
y-axis intercept:  0.250803843946657
=== Elastic Net ===
HpG: Training Score: 0.8702148622421428
HpG:Testing Score: 0.8674960914663666
HpG:MSE: 0.02589911684090457
Weigh

In [107]:
# Comment after done - Takes a long time to execute

# HRpgDF = manipulateDF(battingDF, playersDF, fPlayersDF, 1980, 'HRpG')

In [177]:
hrpgModel = run_ML_Model(HRpgDF, "HRpG")

=== Linear ===
HRpG: Training Score: 0.7506382109274283
HRpG:Testing Score: 0.7477265889288973
HRpG:MSE: 0.0013933551862862139
Weight coefficients:  [0.12387728987114553, 0.23604852946665117, 0.001395048142083439, 0.0076065087334711455, 0.43672490880096254, 0.02728005527867255, -0.0014774201660191725]
y-axis intercept:  0.04530755224799877
=== Ridge ===
HRpG: Training Score: 0.750638210844588
HRpG:Testing Score: 0.7477265227513188
HRpG:MSE: 0.0013933555517978693
Weight coefficients:  [0.1238826895884823, 0.23604856501037633, 0.0013989195786012314, 0.007605867207654665, 0.43670694731752135, 0.027280640714389983, -0.0014774244537442602]
y-axis intercept:  0.04530760605840496
=== Lasso ===
HRpG: Training Score: 0.38573070161455425
HRpG:Testing Score: 0.38257813928367046
HRpG:MSE: 0.003410141196026052
Weight coefficients:  0.0
y-axis intercept:  0.03312891783074852
=== Elastic Net ===
HRpG: Training Score: 0.6672037483757984
HRpG:Testing Score: 0.6617371578328056
HRpG:MSE: 0.00186829156295

In [126]:
# Comment after done - Takes a long time to execute

# BBpGDF = manipulateDF(battingDF, playersDF, fPlayersDF, 1980, 'BBpG')

In [178]:
bbpgModel = run_ML_Model(BBpgDF, "BBpG")

=== Linear ===
BBpG: Training Score: 0.8283702767374077
BBpG:Testing Score: 0.8242480220140065
BBpG:MSE: 0.008510382634834838
Weight coefficients:  [0.12563436656202243, 0.23236127243766133, 0.0789105498461265, 0.020226277782279756, 0.23356937432531713, 0.40644771112956873, -0.002947620874419216]
y-axis intercept:  0.09164923392698321
=== Ridge ===
BBpG: Training Score: 0.8283702767348293
BBpG:Testing Score: 0.8242480171238805
BBpG:MSE: 0.008510382871627864
Weight coefficients:  [0.12563529861293135, 0.23236146687914758, 0.07891268769891609, 0.020226300617198472, 0.2335630882286293, 0.40644573301601844, -0.0029476196780486865]
y-axis intercept:  0.09164917033425199
=== Lasso ===
BBpG: Training Score: 0.7586227275914913
BBpG:Testing Score: 0.7526779278052605
BBpG:MSE: 0.011975998748561574
Weight coefficients:  0.0
y-axis intercept:  0.04981672619277844
=== Elastic Net ===
BBpG: Training Score: 0.8199061858736553
BBpG:Testing Score: 0.8146418821133914
BBpG:MSE: 0.008975537719487737
Weigh

In [179]:
def makePredsTabel(df, stat):
    newDF = df.copy()
    # Create a column to hold prediction values
    newDF["model"] = ""
    
    #Creat columns lists - for Dicts that will then be converted to DFs
    linearYears = []
    linearPlayers = []
    linearActual = []
    linearModel = []
    linearType = []
    
    ridgeYears = []
    ridgePlayers = []
    ridgeActual = []
    ridgeModel = []
    ridgeType = []
    
    lassoYears = []
    lassoPlayers = []
    lassoActual = []
    lassoModel = []
    lassoType = []
    
    eNetYears = []
    eNetPlayers = []
    eNetActual = []
    eNetModel = []
    eNetType = []
    
    p2Label = "p2-" + stat
    p1Label = "p1-" + stat
    inputFactors = [p2Label, p1Label, "RpG", "HpG", "HRpG", "BBpG", "age"]
    
    if stat == "RpG":
        model = rpgModel
    elif stat == "HpG":
        model = hpgModel
    elif stat == "HRpG":
        model = hrpgModel
    elif stat == "BBpG":
        model = bbpgModel
    
    for index, row in newDF.iterrows():
        yr = row["yearID"]
        player = row["fpID"]
        actual = row[stat]
        
        rowSelected = newDF.loc[[index]]

        lin_pred = model[0].predict(rowSelected[inputFactors])[0][0]
        linearYears.append(yr)
        linearPlayers.append(player)
        linearActual.append(actual)
        linearModel.append(lin_pred)
        linearType.append("ML-LN-T1")
        
        ridge_pred = model[1].predict(rowSelected[inputFactors])[0][0]
        ridgeYears.append(yr)
        ridgePlayers.append(player)
        ridgeActual.append(actual)
        ridgeModel.append(ridge_pred)
        ridgeType.append("ML-RD-T1")
        
        lasso_pred = model[2].predict(rowSelected[inputFactors])[0]
        lassoYears.append(yr)
        lassoPlayers.append(player)
        lassoActual.append(actual)
        lassoModel.append(lasso_pred)
        lassoType.append("ML-LS-T1")
        lasso_type = "ML-LS-T1"
        
        eNet_pred = model[3].predict(rowSelected[inputFactors])[0]    
        eNetYears.append(yr)
        eNetPlayers.append(player)
        eNetActual.append(actual)
        eNetModel.append(eNet_pred)
        eNetType.append("ML-EN-T1")
        
    # Make dictionaries with the columns created
    linDict = {"yearID": linearYears,
              "fpID": linearPlayers,
              "actual": linearActual,
              "model": linearModel,
              "model_type": linearType}
    linDF = pd.DataFrame.from_dict(linDict)
    
    ridgeDict = {"yearID": ridgeYears,
              "fpID": ridgePlayers,
              "actual": ridgeActual,
              "model": ridgeModel,
              "model_type": ridgeType}
    ridgeDF = pd.DataFrame.from_dict(ridgeDict)
    
    lassoDict = {"yearID": lassoYears,
              "fpID": lassoPlayers,
              "actual": lassoActual,
              "model": lassoModel,
              "model_type": lassoType}
    lassoDF = pd.DataFrame.from_dict(lassoDict)
    
    eNetDict = {"yearID": eNetYears,
              "fpID": eNetPlayers,
              "actual": eNetActual,
              "model": eNetModel,
              "model_type": eNetType}
    eNetDF = pd.DataFrame.from_dict(eNetDict)
    
    fullDF = linDF.append(ridgeDF)
    fullDF = fullDF.append(lassoDF)
    fullDF = fullDF.append(eNetDF)
    
    return fullDF

In [180]:
fullRpgDF = makePredsTabel(rpgDF, "RpG")
fullRpgDF.head()

Unnamed: 0,yearID,fpID,actual,model,model_type
0,1982,1000,0.0,0.020441,ML-LN-T1
1,1983,1000,0.033333,0.030971,ML-LN-T1
2,1984,1000,0.095238,0.087524,ML-LN-T1
3,1985,1000,0.0,0.027154,ML-LN-T1
4,1983,1003,0.0,0.028552,ML-LN-T1


In [184]:
rpgPath = os.path.join("..", "..", "data", "csv", "outputData", "rpgPredictions.csv")
fullRpgDF.to_csv(rpgPath)

In [181]:
fullHpgDF = makePredsTabel(hpgDF, "HpG")
fullHpgDF.head()

Unnamed: 0,yearID,fpID,actual,model,model_type
0,1982,1000,0.0,0.041308,ML-LN-T1
1,1983,1000,0.033333,0.057228,ML-LN-T1
2,1984,1000,0.095238,0.117268,ML-LN-T1
3,1985,1000,0.0,0.040886,ML-LN-T1
4,1983,1003,0.028571,0.066635,ML-LN-T1


In [185]:
hpgPath = os.path.join("..", "..", "data", "csv", "outputData", "hpgPredictions.csv")
fullHpgDF.to_csv(hpgPath)

In [182]:
fullHRpgDF = makePredsTabel(HRpgDF, "HRpG")
fullHRpgDF.head()

Unnamed: 0,yearID,fpID,actual,model,model_type
0,1982,1000,0.0,0.005417,ML-LN-T1
1,1983,1000,0.0,0.00424,ML-LN-T1
2,1984,1000,0.0,0.011114,ML-LN-T1
3,1985,1000,0.0,0.000985,ML-LN-T1
4,1983,1003,0.0,0.007112,ML-LN-T1


In [None]:
HRpgPath = os.path.join("..", "..", "data", "csv", "outputData", "rpgPredictions.csv")
fullRpgDF.to_csv(rpgPath)

In [183]:
fullBBpgDF = makePredsTabel(BBpgDF, "BBpG")
fullBBpgDF.head()

Unnamed: 0,yearID,fpID,actual,model,model_type
0,1982,1000,0.0,0.012063,ML-LN-T1
1,1983,1000,0.0,0.01242,ML-LN-T1
2,1984,1000,0.285714,0.131738,ML-LN-T1
3,1985,1000,0.0,0.06961,ML-LN-T1
4,1983,1003,0.0,0.01922,ML-LN-T1


In [149]:
BBpGDF.head(1)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-BBpG,p1-BBpG,f1-BBpG
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0,0,0,0,0


In [150]:
rowSel = BBpGDF.iloc[[0]]
rowSel

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-BBpG,p1-BBpG,f1-BBpG
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0,0,0,0,0


In [164]:
model1 = BBpgFormulaDF[0]
row1 = BBpgDF.head(1)
rowSel = BBpGDF.loc[[2]]
rowSel
predVal = model1.predict(row1[["p2-BBpG", "p1-BBpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
predVal2 = model1.predict(rowSel[["p2-BBpG", "p1-BBpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
print(predVal.tolist()[0][0])
print(predVal2)

0.012063470317664374
[[0.01206347]]


In [128]:
model0R = run_ML_Model(rpgDF, "RpG")[0]
model1R = run_ML_Model(rpgDF, "RpG")[1]
model2R = run_ML_Model(rpgDF, "RpG")[2]
model3R = run_ML_Model(rpgDF, "RpG")[3]

model0H = run_ML_Model(hpgDF, "HpG")[0]
model1H = run_ML_Model(hpgDF, "HpG")[1]
model2H = run_ML_Model(hpgDF, "HpG")[2]
model3H = run_ML_Model(hpgDF, "HpG")[3]

model0HR = run_ML_Model(HRpgDF, "HRpG")[0]
model1HR = run_ML_Model(HRpgDF, "HRpG")[1]
model2HR = run_ML_Model(HRpgDF, "HRpG")[2]
model3HR = run_ML_Model(HRpgDF, "HRpG")[3]

model0BB = run_ML_Model(BBpGDF, "BBpG")[0]
model1BB = run_ML_Model(BBpGDF, "BBpG")[1]
model2BB = run_ML_Model(BBpGDF, "BBpG")[2]
model3BB = run_ML_Model(BBpGDF, "BBpG")[3]

In [159]:
newBBpGDF = BBpGDF.copy()
# statsList = ["RpG", "HpG", "HRpG", "BBpG"]
# for s in statsList:
#     new_label = "pred" + s
#     newBBpGDF[new_label] = ""
newBBpGDF[predBBpG] = ""
newBBpGDF.head()
for index, row in newBBpGDF.iterrows():
    rowSelected = newBBpGDF.loc[[index]]
    
#     predR = model0R.predict(rowSelected[["p2-RpG", "p1-RpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
#     predH = model0H.predict(rowSelected[["p2-HpG", "p1-HpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
#     predHR = model0HR.predict(rowSelected[["p2-HRpG", "p1-HRpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
    predBB = model0BB.predict(rowSelected[["p2-BBpG", "p1-BBpG", "RpG", "HpG", "HRpG", "BBpG", "age"]])
    
#     newBBpGDF.at[index, "predRpG"] = predR
#     newBBpGDF.at[index, "predHpG"] = predH
#     newBBpGDF.at[index, "predHRpG"] = predHR
    newBBpGDF.at[index, "predBBpG"] = predBB[0][0]
    
newBBpGDF.head()
    

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,...,careerYear,age,skip,p2-BBpG,p1-BBpG,f1-BBpG,predRpG,predHpG,predHRpG,predBBpG
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,...,7,27.0,0,0,0.0,0.0,,,,[[0.012063470317664374]]
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,...,8,28.0,0,0,0.0,0.285714,,,,[[0.01242041036419203]]
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,...,9,29.0,0,0,0.0,0.0,,,,[[0.1317377486656938]]
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,...,10,30.0,0,0,0.285714,0.0,,,,[[0.06960954267659568]]
9,1983,1003,70,0.0,0.028571,0.0,0.0,0.1,0.0,1957.0,...,3,26.0,0,0,0.015625,0.0,,,,[[0.019219629724844325]]
