In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pandas as pd
from pathlib import Path

In [None]:
import psycopg2
from config import db_password

# Establish a connection to the database by creating a cursor object
# The PostgreSQL server must be accessed through the PostgreSQL APP or Terminal Shell

conn = psycopg2.connect(host="localhost", port = 5432, database="baseball_data", user="postgres", password=db_password)


In [None]:
# Create a cursor object
cur = conn.cursor()

In [None]:
# A sample query of all data from the "career_batter" table in the "baseball_data" database
cur.execute("""SELECT * FROM career_batter2 LIMIT 5""")
query_results = cur.fetchall()
print(query_results)

In [None]:
# import entire career_batter table from postgres to dataframe
career_batter = pd.read_sql('SELECT * FROM career_batter2', conn)
career_batter

In [None]:
# import entire hall_batter table from postgres to dataframe
hall_batter = pd.read_sql('SELECT * FROM hall_batter', conn)
hall_batter

In [None]:
cur.close()
conn.close()

In [2]:
# Import CSV 
file_path = Path('hall_batter.csv')
hall_batter_df = pd.read_csv(file_path)
hall_batter_df

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,BB,SO,HBP,SH,SF,AVG,OBP,SLG,inducted
0,aardsda01,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,N
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,1402.0,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,Y
2,aaronto01,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,9.0,86.0,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,N
3,aasedo01,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,N
4,abadan01,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,zupcibo01,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,7.0,57.0,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,N
17518,zupofr01,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,N
17519,zuvelpa01,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,2.0,34.0,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,N
17520,zuverge01,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,0.0,9.0,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,N


In [4]:
# RFC model data preprocessing
# Convert inducted column to Y = 1, N = 0

b = {'Y': 1, 'N': 0}
hall_batter_df['inducted'] = hall_batter_df['inducted'].map(b).fillna(hall_batter_df['inducted'])
hall_batter_df

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,BB,SO,HBP,SH,SF,AVG,OBP,SLG,inducted
0,aardsda01,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,1402.0,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1
2,aaronto01,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,9.0,86.0,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0
3,aasedo01,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0
4,abadan01,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,zupcibo01,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,7.0,57.0,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0
17518,zupofr01,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0
17519,zuvelpa01,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,2.0,34.0,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0
17520,zuverge01,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,0.0,9.0,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0


In [5]:
# Set target and features variables
y = hall_batter_df.inducted
X = hall_batter_df.drop(columns=["playerID", "inducted"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

#Scale the data
#Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [6]:
# Create a RFC model
rf_model = RandomForestClassifier(n_estimators=128, random_state = 42)

# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [7]:
# Make predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

predictions

array([0, 0, 0, ..., 0, 0, 0])

In [8]:
# Evaluate the model

# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(cm, index= ["Actual 0", "Actual 1"], columns = ["Predicted 0", "Predicted 1"])

# Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)

# Display results
print("Confustion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confustion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,4332,10
Actual 1,22,17


Accuracy Score : 0.9926957315681352
Classification Report
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      4342
           1       0.63      0.44      0.52        39

    accuracy                           0.99      4381
   macro avg       0.81      0.72      0.76      4381
weighted avg       0.99      0.99      0.99      4381



In [9]:
# Calculate feature importance in the RFC model
importances = rf_model.feature_importances_
importances

array([0.07256709, 0.08837472, 0.16197959, 0.1209019 , 0.0470188 ,
       0.06909053, 0.03418336, 0.06915078, 0.03020688, 0.03703316,
       0.04598768, 0.03204744, 0.03563615, 0.02702189, 0.0633273 ,
       0.02766067, 0.03781207])

In [10]:
# Sort the features by their importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.161979591399114, 'R'),
 (0.12090189663761962, 'H'),
 (0.08837472135858362, 'AB'),
 (0.07256709364819917, 'G'),
 (0.06915078140990419, 'RBI'),
 (0.06909052712887079, '3B'),
 (0.06332730461073102, 'AVG'),
 (0.047018795400202484, '2B'),
 (0.04598768316231021, 'SO'),
 (0.037812066413747736, 'SLG'),
 (0.03703316118747247, 'BB'),
 (0.03563614679512566, 'SH'),
 (0.03418335570554486, 'HR'),
 (0.03204744433219931, 'HBP'),
 (0.030206875205156428, 'SB'),
 (0.027660665760771214, 'OBP'),
 (0.027021889844447408, 'SF')]

In [11]:
# Save the Model
import joblib

filename = 'finalized_batter_RFCModel.sav'
joblib.dump(rf_model, filename)

['finalized_batter_RFCModel.sav']

In [12]:
# Load the saved Model

loaded_model = joblib.load(filename)
result = loaded_model.score(X_test_scaled, y_test)
print(result)

0.9926957315681352


In [13]:
# Whole DF
X_2 = hall_batter_df.drop(columns=["playerID", "inducted"])
X_2_scaled = X_scaler.transform(X_2)

# Make predictions using the testing data
predictions = loaded_model.predict(X_2_scaled)

predictions

array([0, 1, 0, ..., 0, 0, 0])

In [14]:
predictions = predictions.tolist()

In [16]:
hall_batter_df["HOF Prediction"] = predictions
hall_batter_df

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,BB,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction
0,aardsda01,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0,0
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,1402.0,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1
2,aaronto01,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,9.0,86.0,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0,0
3,aasedo01,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0,0
4,abadan01,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,zupcibo01,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,7.0,57.0,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0,0
17518,zupofr01,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,2.0,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0,0
17519,zuvelpa01,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,2.0,34.0,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0,0
17520,zuverge01,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,0.0,9.0,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0,0


In [18]:
hall_batter_df.loc[(hall_batter_df['inducted'] == 1) & (hall_batter_df['HOF Prediction'] == 1) ]

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,BB,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,1402.0,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1
239,alomaro01,2379.0,9073.0,1508.0,2724.0,504.0,80.0,210.0,1134.0,474.0,1032.0,1140.0,50.0,148.0,97.0,0.300231,0.371245,0.442852,1,1
378,ansonca01,2524.0,10281.0,1999.0,3435.0,582.0,142.0,97.0,2075.0,277.0,984.0,330.0,32.0,34.0,0.0,0.334111,0.393998,0.446649,1,1
386,aparilu01,2601.0,10230.0,1335.0,2677.0,394.0,92.0,83.0,791.0,506.0,736.0,742.0,27.0,161.0,76.0,0.261681,0.310778,0.342522,1,1
525,averiea01,1668.0,6353.0,1224.0,2019.0,401.0,128.0,238.0,1164.0,70.0,774.0,518.0,33.0,55.0,0.0,0.317803,0.394693,0.533606,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17020,willite01,2292.0,7706.0,1798.0,2654.0,525.0,71.0,521.0,1839.0,24.0,2021.0,709.0,39.0,5.0,20.0,0.344407,0.481709,0.633792,1,1
17120,winfida01,2973.0,11003.0,1669.0,3110.0,540.0,88.0,465.0,1833.0,223.0,1216.0,1686.0,25.0,19.0,95.0,0.282650,0.352622,0.474507,1,1
17351,yastrca01,3308.0,11988.0,1816.0,3419.0,646.0,59.0,452.0,1844.0,168.0,1845.0,1393.0,40.0,13.0,105.0,0.285202,0.379453,0.462045,1,1
17431,youngro01,1211.0,4627.0,812.0,1491.0,236.0,93.0,42.0,592.0,153.0,550.0,390.0,37.0,119.0,0.0,0.322239,0.398542,0.440674,1,1


In [19]:
hall_batter_df.loc[(hall_batter_df['inducted'] == 1)]

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,BB,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,1402.0,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1
239,alomaro01,2379.0,9073.0,1508.0,2724.0,504.0,80.0,210.0,1134.0,474.0,1032.0,1140.0,50.0,148.0,97.0,0.300231,0.371245,0.442852,1,1
378,ansonca01,2524.0,10281.0,1999.0,3435.0,582.0,142.0,97.0,2075.0,277.0,984.0,330.0,32.0,34.0,0.0,0.334111,0.393998,0.446649,1,1
386,aparilu01,2601.0,10230.0,1335.0,2677.0,394.0,92.0,83.0,791.0,506.0,736.0,742.0,27.0,161.0,76.0,0.261681,0.310778,0.342522,1,1
466,ashburi01,2189.0,8365.0,1322.0,2574.0,317.0,109.0,29.0,586.0,234.0,1198.0,571.0,43.0,112.0,18.0,0.307711,0.396405,0.382068,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17067,wilsoha01,1348.0,4760.0,884.0,1461.0,266.0,67.0,244.0,1063.0,52.0,674.0,713.0,20.0,102.0,0.0,0.306933,0.395123,0.544748,1,0
17120,winfida01,2973.0,11003.0,1669.0,3110.0,540.0,88.0,465.0,1833.0,223.0,1216.0,1686.0,25.0,19.0,95.0,0.282650,0.352622,0.474507,1,1
17351,yastrca01,3308.0,11988.0,1816.0,3419.0,646.0,59.0,452.0,1844.0,168.0,1845.0,1393.0,40.0,13.0,105.0,0.285202,0.379453,0.462045,1,1
17431,youngro01,1211.0,4627.0,812.0,1491.0,236.0,93.0,42.0,592.0,153.0,550.0,390.0,37.0,119.0,0.0,0.322239,0.398542,0.440674,1,1


In [20]:
# Measure probability of predictions
prediction_proba = rf_model.predict_proba(X_2_scaled)

prediction_proba

array([[1.   , 0.   ],
       [0.125, 0.875],
       [1.   , 0.   ],
       ...,
       [1.   , 0.   ],
       [1.   , 0.   ],
       [1.   , 0.   ]])

In [21]:
prediction_proba = prediction_proba.tolist()

In [22]:
# Split the list into two lists
N_proba, Y_proba = map(list, zip(*prediction_proba))

In [24]:
# Create a column with the probability for a Yes
hall_batter_df["Yes HOF Probability"] = Y_proba
hall_batter_df

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,...,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction,Yes HOF Probability
0,aardsda01,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0,0,0.000
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,...,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1,0.875
2,aaronto01,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,9.0,...,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0,0,0.000
3,aasedo01,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0.000
4,abadan01,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,zupcibo01,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,7.0,...,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0,0,0.000
17518,zupofr01,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0,0,0.000
17519,zuvelpa01,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,2.0,...,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0,0,0.000
17520,zuverge01,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,0.0,...,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0,0,0.000


In [26]:
# Convert "Yes HOF Probability" column from object to float64
hall_batter_df["Yes HOF Probability"] = pd.to_numeric(hall_batter_df["Yes HOF Probability"])

In [27]:
hall_batter_df["Yes HOF Probability"] = 100 * hall_batter_df["Yes HOF Probability"]

In [29]:
hall_batter_df

Unnamed: 0,playerID,G,AB,R,H,2B,3B,HR,RBI,SB,...,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction,Yes HOF Probability
0,aardsda01,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
1,aaronha01,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,240.0,...,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1,87.5
2,aaronto01,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,9.0,...,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0,0,0.0
3,aasedo01,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
4,abadan01,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,zupcibo01,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,7.0,...,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0,0,0.0
17518,zupofr01,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0,0,0.0
17519,zuvelpa01,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,2.0,...,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0,0,0.0
17520,zuverge01,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,0.0,...,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0,0,0.0


In [30]:
# Import CSV
file_path = Path('Player_Names.csv')
Player_Names_df = pd.read_csv(file_path)
Player_Names_df

Unnamed: 0.1,Unnamed: 0,playerID,First Name,Last Name
0,0,aardsda01,David,Aardsma
1,1,aaronha01,Hank,Aaron
2,2,aaronto01,Tommie,Aaron
3,3,aasedo01,Don,Aase
4,4,abadan01,Andy,Abad
...,...,...,...,...
20088,20088,zupofr01,Frank,Zupo
20089,20089,zuvelpa01,Paul,Zuvella
20090,20090,zuverge01,George,Zuverink
20091,20091,zwilldu01,Dutch,Zwilling


In [31]:
Player_Names_df.drop(columns = ['Unnamed: 0'])

Unnamed: 0,playerID,First Name,Last Name
0,aardsda01,David,Aardsma
1,aaronha01,Hank,Aaron
2,aaronto01,Tommie,Aaron
3,aasedo01,Don,Aase
4,abadan01,Andy,Abad
...,...,...,...
20088,zupofr01,Frank,Zupo
20089,zuvelpa01,Paul,Zuvella
20090,zuverge01,George,Zuverink
20091,zwilldu01,Dutch,Zwilling


In [32]:
Names_Batter_df = Player_Names_df.merge(hall_batter_df, left_on = 'playerID', right_on = 'playerID')

In [33]:
Names_Batter_df

Unnamed: 0.1,Unnamed: 0,playerID,First Name,Last Name,G,AB,R,H,2B,3B,...,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction,Yes HOF Probability
0,0,aardsda01,David,Aardsma,331.0,4.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
1,1,aaronha01,Hank,Aaron,3298.0,12364.0,2174.0,3771.0,624.0,98.0,...,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1,87.5
2,2,aaronto01,Tommie,Aaron,437.0,944.0,102.0,216.0,42.0,6.0,...,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0,0,0.0
3,3,aasedo01,Don,Aase,448.0,5.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
4,4,abadan01,Andy,Abad,15.0,21.0,1.0,2.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,20087,zupcibo01,Bob,Zupcic,319.0,795.0,99.0,199.0,47.0,4.0,...,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0,0,0.0
17518,20088,zupofr01,Frank,Zupo,16.0,18.0,3.0,3.0,1.0,0.0,...,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0,0,0.0
17519,20089,zuvelpa01,Paul,Zuvella,209.0,491.0,41.0,109.0,17.0,2.0,...,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0,0,0.0
17520,20090,zuverge01,George,Zuverink,266.0,142.0,5.0,21.0,2.0,1.0,...,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0,0,0.0


In [34]:
Names_Batter_df = Names_Batter_df.drop(columns = ["Unnamed: 0", "playerID"])

In [35]:
Names_Batter_df

Unnamed: 0,First Name,Last Name,G,AB,R,H,2B,3B,HR,RBI,...,SO,HBP,SH,SF,AVG,OBP,SLG,inducted,HOF Prediction,Yes HOF Probability
0,David,Aardsma,331.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
1,Hank,Aaron,3298.0,12364.0,2174.0,3771.0,624.0,98.0,755.0,2297.0,...,1383.0,32.0,21.0,121.0,0.304998,0.373949,0.554513,1,1,87.5
2,Tommie,Aaron,437.0,944.0,102.0,216.0,42.0,6.0,13.0,94.0,...,145.0,0.0,9.0,6.0,0.228814,0.291506,0.327331,0,0,0.0
3,Don,Aase,448.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0,0,0.0
4,Andy,Abad,15.0,21.0,1.0,2.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.095238,0.240000,0.095238,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17517,Bob,Zupcic,319.0,795.0,99.0,199.0,47.0,4.0,7.0,80.0,...,137.0,6.0,20.0,8.0,0.250314,0.302540,0.345912,0,0,0.0
17518,Frank,Zupo,16.0,18.0,3.0,3.0,1.0,0.0,0.0,0.0,...,6.0,0.0,0.0,0.0,0.166667,0.250000,0.222222,0,0,0.0
17519,Paul,Zuvella,209.0,491.0,41.0,109.0,17.0,2.0,2.0,20.0,...,50.0,2.0,18.0,0.0,0.221996,0.275142,0.276986,0,0,0.0
17520,George,Zuverink,266.0,142.0,5.0,21.0,2.0,1.0,0.0,7.0,...,39.0,0.0,16.0,0.0,0.147887,0.198675,0.176056,0,0,0.0


In [36]:
Names_Batter_df.to_json('Names_Batter.json', orient = 'records')

In [None]:
hall_batter.loc[(hall_batter_df['Yes HOF Probability'] >= 90)]

In [None]:
# Format the "Yes HOF Probability" column to a percentage, does not maintain float 64 type
# hall_batter_df["Yes HOF Probability"] = hall_batter_df["Yes HOF Probability"].map("{:.2%}".format)