<h1>Player Prediction Machine Learning Models - Batting</h1>

<h4>Import Dependencies</h4>

In [1]:
%matplotlib inline

In [2]:
import os
import csv
import pandas as pd

import sqlite3
import csv
from sqlalchemy import create_engine

import matplotlib.pyplot as plt
import numpy as np

from config import pgPassword

<h4>Create a connection to SQL database</h4>

In [3]:
pg_user = 'postgres'
pg_password = pgPassword
db_name = 'baseball_db'

connection_string = f"{pg_user}:{pg_password}@localhost:5432/{db_name}"
engine = create_engine(f'postgresql://{connection_string}')

<h4>Read in databases tables as DatFrames</h4>

In [4]:
teamsStatsDF = pd.read_sql_table("Team-Stats", con = engine)
battingDF = pd.read_sql_table("Batting", con = engine)
pitchingDF = pd.read_sql_table("Pitching", con = engine)
playersDF = pd.read_sql_table("Players", con = engine)
franchisesDF = pd.read_sql_table("Franchises", con = engine)
salariesDF = pd.read_sql_table("Salaries", con = engine)
teamsDF = pd.read_sql_table("Teams", con = engine)
fPlayersDF = pd.read_sql_table("FranchisePlayers", con = engine)

In [5]:
battingDF.head()

Unnamed: 0,yearID,stint,G,R,H,HR,BB,IBB,SO,SB,fpID
0,1954,1,35,2,10,0,3,0.0,15.0,0.0,336907024
1,1955,1,46,1,2,0,1,0.0,0.0,0.0,336907024
2,1956,1,49,4,11,0,2,0.0,28.0,0.0,336907024
3,1957,1,49,4,14,0,6,0.0,26.0,0.0,336907024
4,1958,1,46,1,6,0,5,0.0,13.0,1.0,336907024


<h4>Manipulate Data</h4>

In [6]:
# Get data going back to latest year of interest
latestDataYear = 1980
targetBattingDF = battingDF[battingDF["yearID"] >=latestDataYear]
targetBattingDF.head()

Unnamed: 0,yearID,stint,G,R,H,HR,BB,IBB,SO,SB,fpID
87,1980,1,36,0,0,0,0,0.0,0.0,0.0,143883
88,1980,1,36,0,0,0,0,0.0,0.0,0.0,424800
89,1980,1,36,0,0,0,0,0.0,0.0,0.0,989035
90,1980,1,36,0,0,0,0,0.0,0.0,0.0,2809975
91,1980,1,36,0,0,0,0,0.0,0.0,0.0,5323248


In [7]:
# Combine the stats of players with multiple stints in a year
combinedStints = targetBattingDF.groupby(["yearID", "fpID"]).sum().reset_index()
combinedStints.head()

Unnamed: 0,yearID,fpID,stint,G,R,H,HR,BB,IBB,SO,SB
0,1980,1000,1,36,0,0,0,0,0.0,0.0,0.0
1,1980,1008,1,120,18,91,2,28,8.0,36.0,3.0
2,1980,1015,1,77,0,1,0,0,0.0,6.0,0.0
3,1980,1024,1,147,70,144,14,32,2.0,56.0,1.0
4,1980,1035,1,158,83,170,18,49,6.0,73.0,7.0


In [8]:
# Check to make sure combining Stints worked properly
totalRecords = targetBattingDF.shape[0]
combinedRecords = combinedStints.shape[0]

multipleStints = targetBattingDF.loc[targetBattingDF["stint"] > 1]
multipleStintRecords = multipleStints.shape[0]

if totalRecords - multipleStintRecords == combinedRecords:
    print("Success")
else:
    print("Failure")

Success


In [9]:
# Get Player Info

#just the player ID and fpID
fPlayersDF1 = fPlayersDF[["playerID", "fpID"]]
# Merge players with fplayers1
playerMergedDF = pd.merge(playersDF, fPlayersDF1, on = ["playerID"])
playerMergedDF.head()

Unnamed: 0,playerID,birthYear,nameFirst,nameLast,debut,finalGame,fpID
0,aardsda01,1981.0,David,Aardsma,2004-04-06,2015-08-23,266506624
1,aardsda01,1981.0,David,Aardsma,2004-04-06,2015-08-23,332151624
2,aardsda01,1981.0,David,Aardsma,2004-04-06,2015-08-23,362751115
3,aardsda01,1981.0,David,Aardsma,2004-04-06,2015-08-23,417304183
4,aardsda01,1981.0,David,Aardsma,2004-04-06,2015-08-23,451903563


In [10]:
# Create DF with  Years fields

playerYearsDF = playerMergedDF
playerYearsDF["debutYear"] = playerYearsDF["debut"].dt.year
playerYearsDF["finalYear"] = playerYearsDF["finalGame"].dt.year
playerYearsDF["totalYears"] = playerYearsDF.finalYear + 1 - playerYearsDF.debutYear
playerYearsDF = playerYearsDF[["fpID", "playerID", "birthYear", "debutYear", "finalYear", "totalYears"]]
playerYearsDF.head()

Unnamed: 0,fpID,playerID,birthYear,debutYear,finalYear,totalYears
0,266506624,aardsda01,1981.0,2004,2015,12
1,332151624,aardsda01,1981.0,2004,2015,12
2,362751115,aardsda01,1981.0,2004,2015,12
3,417304183,aardsda01,1981.0,2004,2015,12
4,451903563,aardsda01,1981.0,2004,2015,12


In [11]:
# Merge Player Years data with batting data

mergedBatting = pd.merge(combinedStints, playerYearsDF, on = ["fpID"])
mergedBatting.head()

Unnamed: 0,yearID,fpID,stint,G,R,H,HR,BB,IBB,SO,SB,playerID,birthYear,debutYear,finalYear,totalYears
0,1980,1000,1,36,0,0,0,0,0.0,0.0,0.0,barkele01,1955.0,1976,1987,12
1,1981,1000,1,22,0,0,0,0,0.0,0.0,0.0,barkele01,1955.0,1976,1987,12
2,1982,1000,1,33,0,0,0,0,0.0,0.0,0.0,barkele01,1955.0,1976,1987,12
3,1983,1000,3,30,1,1,0,0,0.0,5.0,0.0,barkele01,1955.0,1976,1987,12
4,1984,1000,1,21,2,2,0,6,0.0,19.0,0.0,barkele01,1955.0,1976,1987,12


In [12]:
# Normalize statistics to be per game

# Divide statistics by games played
mergedBatting["RpG"] = mergedBatting.R / mergedBatting.G
mergedBatting["HpG"] = mergedBatting.H / mergedBatting.G
mergedBatting["HRpG"] = mergedBatting.HR / mergedBatting.G
mergedBatting["BBpG"] = (mergedBatting.BB + mergedBatting.IBB) / mergedBatting.G
mergedBatting["SOpG"] = mergedBatting.SO / mergedBatting.G
mergedBatting["SBpG"] = mergedBatting.SB / mergedBatting.G

#Limit to only columns of interest
desiredCols = ["yearID", "fpID", "G","RpG", "HpG", "HRpG", "BBpG", "SOpG", "SBpG",
               "birthYear", "debutYear", "finalYear", "totalYears"]
mergedBatting = mergedBatting[desiredCols]
mergedBatting.head()

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears
0,1980,1000,36,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12
1,1981,1000,22,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12


In [31]:
# Add age and careerYears columns

mergedBatting["careerYear"] = mergedBatting.yearID + 1 - mergedBatting.debutYear
mergedBatting["age"] = mergedBatting.yearID - mergedBatting.birthYear
mergedBatting.head()

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age
0,1980,1000,36,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,5,25.0
1,1981,1000,22,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,6,26.0
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0


In [46]:
# NOTE - This cell take a while to execute

# Add a column to indicate rows that should be skipped
#  years < latestDataYear + 2 (we can't get 2 year previous data for these years)
#  careerYear < 3 (we can't get 2 year previous data for these years)
#  year = finalYear (we can't use next year to check model)
#  year = 2019 (last year of our data so )

mergedBatting["skip"] = 0
for index, row in mergedBatting.iterrows():
    if row["yearID"] < (latestDataYear + 2):
        mergedBatting.at[index, "skip"] = 1
    elif row["careerYear"] < 3:
        mergedBatting.at[index, "skip"] = 1
    elif row["yearID"] == row["finalYear"]:
        mergedBatting.at[index, "skip"] = 1
    elif row["yearID"] == 2019:
        mergedBatting.at[index, "skip"] = 1
        
mergedBatting.head(20)


Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip
0,1980,1000,36,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,5,25.0,1
1,1981,1000,22,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,6,26.0,1
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0,0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0,0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0,0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0,0
6,1987,1000,11,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,12,32.0,1
7,1980,1008,120,0.15,0.758333,0.016667,0.3,0.3,0.025,1955.0,1978,1989,12,3,25.0,1
8,1981,1008,90,0.288889,0.866667,0.055556,0.411111,0.233333,0.011111,1955.0,1978,1989,12,4,26.0,1
9,1982,1008,118,0.288136,0.805085,0.025424,0.389831,0.338983,0.033898,1955.0,1978,1989,12,5,27.0,0


In [55]:
#  Sort by player and year - to get career all together
sortedBatting = mergedBatting.sort_values(by = ["fpID", "yearID"])
sortedBatting = sortedBatting.reset_index(drop=True)
sortedBatting.head(20)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip
0,1980,1000,36,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,5,25.0,1
1,1981,1000,22,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,6,26.0,1
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0,0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0,0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0,0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0,0
6,1987,1000,11,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,12,32.0,1
7,1981,1003,15,0.0,0.0,0.0,0.0,0.066667,0.0,1957.0,1981,1995,15,1,24.0,1
8,1982,1003,64,0.0,0.015625,0.0,0.015625,0.09375,0.0,1957.0,1981,1995,15,2,25.0,1
9,1983,1003,70,0.0,0.028571,0.0,0.0,0.1,0.0,1957.0,1981,1995,15,3,26.0,0


In [56]:
var = sortedBatting.iloc[3]["RpG"]
print(var)

0.03333333333333333


In [57]:
#NOTE - This cell takes a while to execute

# Iterate through the sorted batting and grab previous stats
mlDF = sortedBatting.copy()
# playersMLBatting = playersMLBatting.drop(columns=["birthYear", "debuYear"])

# Designate the stat of interes
stat = "RpG"
# Make Columns labels based on stat
p2Label = "p2-" + stat
p1Label = "p1-" + stat
f1Label = "f1-" + stat
# Add those columns to DF
mlDF[p2Label] = ""
mlDF[p1Label] = ""
mlDF[f1Label] = ""

# Itterate through DF and populate those columns
i = 0
for index, row in mlDF.iterrows():
    if row["skip"] == 1:
        continue
    p2Stat = mlDF.iloc[index - 2][stat]
    p1Stat = mlDF.iloc[index - 1][stat]
    f1Stat = mlDF.iloc[index + 1][stat]
    
    mlDF.at[index, p2Label] = p2Stat
    mlDF.at[index, p1Label] = p1Stat
    mlDF.at[index, f1Label] = f1Stat
    i +=1
    
    if (i % 100000 ==0):
        print(i)

    
    
mlDF.head(10)

100000
200000


Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-RpG,p1-RpG,f1-RpG
0,1980,1000,36,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,5,25.0,1,,,
1,1981,1000,22,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,6,26.0,1,,,
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0,0,0.0,0.0,0.0333333
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0,0,0.0,0.0,0.0952381
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0,0,0.0,0.0333333,0.0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0,0,0.0333333,0.0952381,0.0
6,1987,1000,11,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,12,32.0,1,,,
7,1981,1003,15,0.0,0.0,0.0,0.0,0.066667,0.0,1957.0,1981,1995,15,1,24.0,1,,,
8,1982,1003,64,0.0,0.015625,0.0,0.015625,0.09375,0.0,1957.0,1981,1995,15,2,25.0,1,,,
9,1983,1003,70,0.0,0.028571,0.0,0.0,0.1,0.0,1957.0,1981,1995,15,3,26.0,0,0.0,0.0,0.0


In [61]:
mlDF.tail(10)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-RpG,p1-RpG,f1-RpG
285184,2010,698440183,142,0.598592,1.133803,0.176056,0.528169,0.690141,0.028169,1984.0,2005,2019,15,6,26.0,0,0.481132,0.700637,0.514851
285185,2011,698440183,101,0.514851,1.128713,0.118812,0.445545,0.722772,0.029703,1984.0,2005,2019,15,7,27.0,0,0.700637,0.598592,0.641379
285186,2012,698440183,145,0.641379,1.124138,0.172414,0.448276,0.8,0.034483,1984.0,2005,2019,15,8,28.0,0,0.598592,0.514851,0.571429
285187,2013,698440183,147,0.571429,1.061224,0.176871,0.421769,0.904762,0.040816,1984.0,2005,2019,15,9,29.0,0,0.514851,0.641379,0.42623
285188,2014,698440183,61,0.42623,0.983607,0.081967,0.360656,0.606557,0.0,1984.0,2005,2019,15,10,30.0,0,0.641379,0.571429,0.452632
285189,2015,698440183,95,0.452632,0.905263,0.168421,0.347368,0.831579,0.010526,1984.0,2005,2019,15,11,31.0,0,0.571429,0.42623,0.521739
285190,2016,698440183,115,0.521739,0.808696,0.130435,0.26087,0.904348,0.034783,1984.0,2005,2019,15,12,32.0,0,0.42623,0.452632,0.625
285191,2017,698440183,144,0.625,1.104167,0.25,0.3125,0.875,0.006944,1984.0,2005,2019,15,13,33.0,0,0.452632,0.521739,0.388235
285192,2018,698440183,85,0.388235,0.894118,0.152941,0.364706,0.647059,0.011765,1984.0,2005,2019,15,14,34.0,0,0.521739,0.625,0.384615
285193,2019,698440183,52,0.384615,0.846154,0.115385,0.326923,0.75,0.0,1984.0,2005,2019,15,15,35.0,1,,,


<h2>=======================================================================================================</h2>

In [98]:
# Get rid of the skipped rows, then all complete data
mlData = mlDF.loc[mlDF['skip'] == 0]
mlData.tail(20)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,skip,p2-RpG,p1-RpG,f1-RpG
285170,2009,698387328,159,0.616352,0.962264,0.226415,0.622642,0.981132,0.125786,1979.0,2002,2017,16,8,30.0,0,0.457447,0.544776,0.679487
285171,2010,698387328,156,0.679487,1.051282,0.173077,0.564103,0.942308,0.083333,1979.0,2002,2017,16,9,31.0,0,0.544776,0.616352,0.46
285172,2011,698387328,150,0.46,0.866667,0.133333,0.526667,1.066667,0.126667,1979.0,2002,2017,16,10,32.0,0,0.616352,0.679487,0.518519
285173,2012,698387328,81,0.518519,1.111111,0.061728,0.54321,0.703704,0.098765,1979.0,2002,2017,16,11,33.0,0,0.679487,0.46,0.651163
285174,2013,698387328,129,0.651163,1.139535,0.193798,0.488372,0.782946,0.077519,1979.0,2002,2017,16,12,34.0,0,0.46,0.518519,0.578231
285175,2014,698387328,147,0.578231,1.061224,0.108844,0.585034,0.768707,0.061224,1979.0,2002,2017,16,13,35.0,0,0.518519,0.651163,0.579545
285176,2015,698387328,88,0.579545,0.829545,0.136364,0.431818,0.954545,0.0,1979.0,2002,2017,16,14,36.0,0,0.651163,0.578231,0.587413
285177,2016,698387328,143,0.587413,0.895105,0.146853,0.496503,0.972028,0.034965,1979.0,2002,2017,16,15,37.0,0,0.578231,0.579545,0.5
285181,2007,698440183,162,0.611111,1.074074,0.148148,0.395062,0.771605,0.024691,1984.0,2005,2019,15,3,23.0,0,0.3,0.535032,0.481132
285182,2008,698440183,106,0.481132,1.141509,0.132075,0.301887,0.669811,0.009434,1984.0,2005,2019,15,4,24.0,0,0.535032,0.611111,0.700637


In [93]:
# Now Working on a model
inputFactors = ["p2-RpG", "p1-RpG", "RpG", "HpG", "HRpG", "BBpG", "age"]
X = mlData[inputFactors]
y = mlData[["f1-RpG"]]
print(X.shape, y.shape)

(208831, 7) (208831, 1)


In [67]:
# Split data into training and test data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [68]:
# Create a model Using LinearRegression
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [69]:
# Fit the model to the Training Set
model.fit(X_train, y_train)

# Calculate the R2 scores
training_score = model.score(X_train, y_train)
testing_score = model.score(X_test, y_test)

print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")

Training Score: 0.8436377545331359
Testing Score: 0.8409208299944239


In [92]:
# Print the Coefficients of the Model
coeffs = model.coef_.tolist()[0]
y_int = model.intercept_.tolist()[0]
print('Weight coefficients: ', coeffs)
print('y-axis intercept: ', y_int) 

Weight coefficients:  [0.09534488492191104, 0.19043564596119467, 0.34311491370334507, 0.11438078046194051, 0.08404500352451712, 0.0942251060894592, -0.004887338421816933]
y-axis intercept:  0.152483218749756


In [100]:
# Put the inputs and their coeffs in a df
# inputFactors.append("Y-Int")
# coeffs.append(y_int)
rpgFormula = {'Stat': stat,
              'Input': inputFactors,
             'Coeff': coeffs}

rpgFormulaDF = pd.DataFrame(rpgFormula)
rpgFormulaDF.head(8)

Unnamed: 0,Stat,Input,Coeff
0,RpG,p2-RpG,0.095345
1,RpG,p1-RpG,0.190436
2,RpG,RpG,0.343115
3,RpG,HpG,0.114381
4,RpG,HRpG,0.084045
5,RpG,BBpG,0.094225
6,RpG,age,-0.004887
7,RpG,Y-Int,0.152483


============================================    OLD STUFF  ============================

In [27]:
# get a dataframe that will only hold valid inputs 
#  to get historical data for

# get only data from latestDataYear + 2
#  that way we can grab 2 years previous data
validHistoric = mergedBatting.loc[mergedBatting["yearID"] >= latestDataYear + 2]

# get only players who are in at least their 3rd career year and who played at least 4
#  we are using 3 years data to predict a 4th year value
validHistoric = validHistoric.loc[(validHistoric["careerYear"] >= 3) & (validHistoric["totalYears"] >= 4)]

# Get only data prior to 2019 season 
#  we can't supervise a model based of  2019 data becaus ewe don't have 2020 data to check it against
validHistoric = validHistoric.loc[validHistoric["yearID"] < 2019]
validHistoric.sort_values(by = ["yearID"], ascending = False).head()

# Remove players final years
#  we can't use 'next year' results to supervise when they don't play a next year
validHistoric2 = validHistoric.loc[validHistoric["yearID"] != validHistoric["finalYear"]]
validHistoric2.head()

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0
9,1982,1008,118,0.288136,0.805085,0.025424,0.389831,0.338983,0.033898,1955.0,1978,1989,12,5,27.0


In [26]:
# run the above funtion on the entire 

# Designate Desired Stat
stat = "RpG"
p2Label = "p2-" + stat
p1Label = "p1-" + stat
f1Label = "f1-" + stat
# Set up 
validHistoricRpG = validHistoric2
validHistoricRpG.head(15)
# Set up columns for the new data to be entered into
validHistoricRpG[p2Label] = ""
validHistoricRpG[p1Label] = ""
validHistoricRpG[f1Label] = ""


# # print("=================")
# gap = 0
# gapTimes = 0
# old_player = 0
# rows_done = 0
# for index, row in validHistoricRpG.iterrows():
#     rows_done += 1
    
#     player = row["fpID"]
#     if player != old_player:
# #         print("NEW PLAYER")
#         gap = 0
#         gapTimes = 0
#         old_player = player
# #     print(f"playerID: {player}")
#     yr = row["yearID"]
# #     print(f"===== {yr} =====")
#     finalYear = row["finalYear"]
#     if yr >= finalYear:
#         continue
#     if gapTimes == 2:
#         print("Executing 2")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1 - gap
#         nextYear = yr + 1
#         gapTimes = 1
#     elif gapTimes == 1:
#         print("Executing 1")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1
#         nextYear = yr + 1
#         gapTimes = 0
#     else:
#         prevTwoYear = yr - 2
#         prevOneYear = yr - 1
#         nextYear = yr + 1

    

#     prevTwoRow = mergedBatting.loc[(mergedBatting["yearID"] == prevTwoYear)
#                                      & (mergedBatting["fpID"] == player)]
#     prevOneRow = mergedBatting.loc[(mergedBatting["yearID"] == prevOneYear)
#                                              & (mergedBatting["fpID"] == player)]
#     nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#     while nextYearRow.empty:
# #         print(f"{nextYear} Missing")
#         nextYear += 1
#         nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#         gapTimes = 2
#         gap = nextYear - yr - 1
    
#     prevTwoStat = prevTwoRow[stat].values[0]
#     prevOneStat = prevOneRow[stat].values[0]    
#     nextYearStat = nextYearRow[stat].values[0]    
    
#     validHistoricRpG.at[index, "prevTwo"] = prevTwoStat
#     validHistoricRpG.at[index, "prevOne"] = prevOneStat
#     validHistoricRpG.at[index, "nextOne"] = nextYearStat
    
#     print(rows_done)
    
# #     print(f"{prevTwoYear}: {prevTwoStat}")
# #     print(f"{prevOneYear}: {prevOneStat}")
# #     print(f"{yr}: {validHistoricRpG.loc[index, stat]}")
# #     print(f"{nextYear}: {nextYearStat}")
          
# #     print("=================")

        
# validHistoricRpG.iloc[225:235].head(10)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0
9,1982,1008,118,0.288136,0.805085,0.025424,0.389831,0.338983,0.033898,1955.0,1978,1989,12,5,27.0
10,1983,1008,134,0.320896,0.940299,0.014925,0.574627,0.179104,0.007463,1955.0,1978,1989,12,6,28.0
11,1984,1008,95,0.273684,0.705263,0.042105,0.389474,0.263158,0.010526,1955.0,1978,1989,12,7,29.0
12,1985,1008,70,0.171429,0.6,0.0,0.328571,0.171429,0.0,1955.0,1978,1989,12,8,30.0
13,1986,1008,64,0.171875,0.5625,0.0,0.25,0.15625,0.015625,1955.0,1978,1989,12,9,31.0
14,1987,1008,37,0.108108,0.378378,0.027027,0.459459,0.405405,0.0,1955.0,1978,1989,12,10,32.0


In [16]:
validHistoric.head(15)

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age
2,1982,1000,33,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,7,27.0
3,1983,1000,30,0.033333,0.033333,0.0,0.0,0.166667,0.0,1955.0,1976,1987,12,8,28.0
4,1984,1000,21,0.095238,0.095238,0.0,0.285714,0.904762,0.0,1955.0,1976,1987,12,9,29.0
5,1985,1000,20,0.0,0.0,0.0,0.0,0.35,0.0,1955.0,1976,1987,12,10,30.0
6,1987,1000,11,0.0,0.0,0.0,0.0,0.0,0.0,1955.0,1976,1987,12,12,32.0
9,1982,1008,118,0.288136,0.805085,0.025424,0.389831,0.338983,0.033898,1955.0,1978,1989,12,5,27.0
10,1983,1008,134,0.320896,0.940299,0.014925,0.574627,0.179104,0.007463,1955.0,1978,1989,12,6,28.0
11,1984,1008,95,0.273684,0.705263,0.042105,0.389474,0.263158,0.010526,1955.0,1978,1989,12,7,29.0
12,1985,1008,70,0.171429,0.6,0.0,0.328571,0.171429,0.0,1955.0,1978,1989,12,8,30.0
13,1986,1008,64,0.171875,0.5625,0.0,0.25,0.15625,0.015625,1955.0,1978,1989,12,9,31.0


In [101]:
nums = [83,84, 85,86,87, 88, 89, 90, 91, 92, 95, 96, 97]
lastYear = nums[-1]
gap = 0
gapTimes = 0
for num in nums:
    print(f"===== {num} =====")
    if num >= lastYear:
        continue
    if gapTimes == 2:
        print("Executing 2")
        prev2 = num - 2 - gap
        prev1 = num - 1 - gap
        next1 = num + 1
        gapTimes = 1
    elif gapTimes == 1:
        print("Executing 1")
        prev2 = num - 2 - gap
        prev1 = num - 1
        next1 = num + 1
        gapTimes = 0
    else:
        prev2 = num - 2
        prev1 = num - 1
        next1 = num + 1
    
    while next1 not in nums and next1 <lastYear:
        print(f"Missing {next1}")
        next1 += 1
        gapTimes = 2
        gap = next1 - num - 1
    
    print(prev2, prev1, num, next1)


===== 83 =====
81 82 83 84
===== 84 =====
82 83 84 85
===== 85 =====
83 84 85 86
===== 86 =====
84 85 86 87
===== 87 =====
85 86 87 88
===== 88 =====
86 87 88 89
===== 89 =====
87 88 89 90
===== 90 =====
88 89 90 91
===== 91 =====
89 90 91 92
===== 92 =====
Missing 93
Missing 94
90 91 92 95
===== 95 =====
Executing 2
91 92 95 96
===== 96 =====
Executing 1
92 95 96 97
===== 97 =====


In [None]:
# # Practice looping though dataframes
# sampHistoric = validHistoric.head(10)

# # Set up columns for the new data to be entered into
# sampHistoric["prevTwo"] = ""
# sampHistoric["prevOne"] = ""
# sampHistoric["nextOne"] = ""
# stat = "HpG"
# print("=================")
# gap = 0
# gapTimes = 0
# old_player = 0
# for index, row in sampHistoric.iterrows():
#     player = row["fpID"]
#     if player != old_player:
#         print("NEW PLAYER")
#         gap = 0
#         gapTimes = 0
#         old_player = player
#     print(f"playerID: {player}")
#     yr = row["yearID"]
#     print(f"===== {yr} =====")
#     finalYear = row["finalYear"]
#     if yr >= finalYear:
#         continue
#     if gapTimes == 2:
#         print("Executing 2")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1 - gap
#         nextYear = yr + 1
#         gapTimes = 1
#     elif gapTimes == 1:
#         print("Executing 1")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1
#         nextYear = yr + 1
#         gapTimes = 0
#     else:
#         prevTwoYear = yr - 2
#         prevOneYear = yr - 1
#         nextYear = yr + 1

    

#     prevTwoRow = mergedBatting.loc[(mergedBatting["yearID"] == prevTwoYear)
#                                      & (mergedBatting["fpID"] == player)]
#     prevOneRow = mergedBatting.loc[(mergedBatting["yearID"] == prevOneYear)
#                                              & (mergedBatting["fpID"] == player)]
#     nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#     while nextYearRow.empty:
#         print(f"{nextYear} Missing")
#         nextYear += 1
#         nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#         gapTimes = 2
#         gap = nextYear - yr - 1
    
#     prevTwoStat = prevTwoRow[stat].values[0]
#     prevOneStat = prevOneRow[stat].values[0]    
#     nextYearStat = nextYearRow[stat].values[0]    
    
#     sampHistoric.at[index, "prevTwo"] = prevTwoStat
#     sampHistoric.at[index, "prevOne"] = prevOneStat
#     sampHistoric.at[index, "nextOne"] = nextYearStat
    
#     print(f"{prevTwoYear}: {prevTwoStat}")
#     print(f"{prevOneYear}: {prevOneStat}")
#     print(f"{yr}: {sampHistoric.loc[index, stat]}")
#     print(f"{nextYear}: {nextYearStat}")
          
#     print("=================")

        
# sampHistoric.head()


In [21]:
# run the above funtion on the entire 

# Designate Desired Stat
stat = "RpG"
# Set up 
validHistoricRpG = validHistoric2

# Set up columns for the new data to be entered into
validHistoricRpG["prevTwo"] = ""
validHistoricRpG["prevOne"] = ""
validHistoricRpG["nextOne"] = ""
stat = "RpG"
# print("=================")
gap = 0
gapTimes = 0
old_player = 0
rows_done = 0
# for index, row in validHistoricRpG.iterrows():
#     rows_done += 1
    
#     player = row["fpID"]
#     if player != old_player:
# #         print("NEW PLAYER")
#         gap = 0
#         gapTimes = 0
#         old_player = player
# #     print(f"playerID: {player}")
#     yr = row["yearID"]
# #     print(f"===== {yr} =====")
#     finalYear = row["finalYear"]
#     if yr >= finalYear:
#         continue
#     if gapTimes == 2:
#         print("Executing 2")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1 - gap
#         nextYear = yr + 1
#         gapTimes = 1
#     elif gapTimes == 1:
#         print("Executing 1")
#         prevTwoYear = yr - 2 - gap
#         prevOneYear = yr - 1
#         nextYear = yr + 1
#         gapTimes = 0
#     else:
#         prevTwoYear = yr - 2
#         prevOneYear = yr - 1
#         nextYear = yr + 1

    

#     prevTwoRow = mergedBatting.loc[(mergedBatting["yearID"] == prevTwoYear)
#                                      & (mergedBatting["fpID"] == player)]
#     prevOneRow = mergedBatting.loc[(mergedBatting["yearID"] == prevOneYear)
#                                              & (mergedBatting["fpID"] == player)]
#     nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#     while nextYearRow.empty:
# #         print(f"{nextYear} Missing")
#         nextYear += 1
#         nextYearRow = mergedBatting.loc[(mergedBatting["yearID"] == nextYear)
#                                              & (mergedBatting["fpID"] == player)]
#         gapTimes = 2
#         gap = nextYear - yr - 1
    
#     prevTwoStat = prevTwoRow[stat].values[0]
#     prevOneStat = prevOneRow[stat].values[0]    
#     nextYearStat = nextYearRow[stat].values[0]    
    
#     validHistoricRpG.at[index, "prevTwo"] = prevTwoStat
#     validHistoricRpG.at[index, "prevOne"] = prevOneStat
#     validHistoricRpG.at[index, "nextOne"] = nextYearStat
    
#     print(rows_done)
    
# #     print(f"{prevTwoYear}: {prevTwoStat}")
# #     print(f"{prevOneYear}: {prevOneStat}")
# #     print(f"{yr}: {validHistoricRpG.loc[index, stat]}")
# #     print(f"{nextYear}: {nextYearStat}")
          
# #     print("=================")

        
validHistoricRpG.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,prevTwo,prevOne,nextOne
317,1995,2295,113,0.60177,1.247788,0.185841,0.389381,0.575221,0.044248,1956.0,1977,1997,21,19,39.0,,,
318,1996,2295,152,0.453947,0.967105,0.144737,0.440789,0.572368,0.026316,1956.0,1977,1997,21,20,40.0,,,
322,1982,2368,77,0.311688,0.662338,0.077922,0.220779,0.454545,0.012987,1951.0,1972,1985,14,11,31.0,,,
323,1983,2368,73,0.342466,0.69863,0.068493,0.232877,0.424658,0.0,1951.0,1972,1985,14,12,32.0,,,
324,1984,2368,35,0.057143,0.514286,0.028571,0.457143,0.285714,0.0,1951.0,1972,1985,14,13,33.0,,,
327,1982,2443,34,0.205882,0.205882,0.088235,0.176471,0.411765,0.0,1957.0,1980,1987,8,3,25.0,,,
328,1983,2443,56,0.089286,0.392857,0.053571,0.196429,0.482143,0.017857,1957.0,1980,1987,8,4,26.0,,,
329,1984,2443,86,0.27907,0.744186,0.046512,0.139535,0.593023,0.0,1957.0,1980,1987,8,5,27.0,,,
330,1985,2443,105,0.52381,1.047619,0.171429,0.095238,0.657143,0.028571,1957.0,1980,1987,8,6,28.0,,,
331,1986,2443,81,0.185185,0.45679,0.098765,0.185185,0.617284,0.0,1957.0,1980,1987,8,7,29.0,,,


In [68]:
validHistoricRpG.iloc[[230, 235]].head()

Unnamed: 0,yearID,fpID,G,RpG,HpG,HRpG,BBpG,SOpG,SBpG,birthYear,debutYear,finalYear,totalYears,careerYear,age,prevTwo,prevOne,nextOne
327,1982,2443,34,0.205882,0.205882,0.088235,0.176471,0.411765,0.0,1957.0,1980,1987,8,3,25.0,,,
335,1982,2599,137,0.423358,0.773723,0.153285,0.525547,0.532847,0.043796,1954.0,1976,1988,13,7,28.0,,,
