In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pandas as pd
from pathlib import Path

In [None]:
import psycopg2
from config import db_password

# Establish a connection to the database by creating a cursor object
# The PostgreSQL server must be accessed through the PostgreSQL APP or Terminal Shell

conn = psycopg2.connect(host="localhost", port = 5432, database="baseball_data", user="postgres", password=db_password)


In [None]:
# Create a cursor object
cur = conn.cursor()

In [None]:
# A sample query of all data from the "career_batter" table in the "baseball_data" database
cur.execute("""SELECT * FROM career_batter2 LIMIT 5""")
query_results = cur.fetchall()
print(query_results)

In [None]:
# import entire hall_pitching table from postgres to dataframe
hall_pitching = pd.read_sql('SELECT * FROM hall_pitching', conn)
hall_pitching

In [None]:
# import entire hall_pitching table from postgres to dataframe
career_pitching = pd.read_sql('SELECT * FROM career_pitching', conn)
career_pitching

In [None]:
cur.close()
conn.close()

In [5]:
# Import CSV
file_path = Path('hall_pitching.csv')
hall_pitching = pd.read_csv(file_path)
hall_pitching

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,SO,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted
0,aardsda01,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,160.0,...,340.0,12.0,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,N
1,aasedo01,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,468.0,...,641.0,22.0,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,N
2,abadfe01,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,135.0,...,280.0,10.0,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,N
3,abbeybe01,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,285.0,...,161.0,18.0,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,N
4,abbotda01,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,9.0,...,1.0,3.0,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,zuberbi01,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,374.0,...,383.0,28.0,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,N
9131,zuberty01,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,10.0,...,30.0,1.0,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,N
9132,zumayjo01,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,71.0,...,210.0,16.0,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,N
9133,zuverge01,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,253.0,...,223.0,10.0,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,N


In [6]:
# RFC model data preprocessing
# Convert inducted column to Y = 1, N = 0

b = {'Y': 1, 'N': 0}
hall_pitching['inducted'] = hall_pitching['inducted'].map(b).fillna(hall_pitching['inducted'])
hall_pitching

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,SO,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted
0,aardsda01,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,160.0,...,340.0,12.0,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0
1,aasedo01,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,468.0,...,641.0,22.0,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0
2,abadfe01,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,135.0,...,280.0,10.0,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0
3,abbeybe01,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,285.0,...,161.0,18.0,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0
4,abbotda01,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,9.0,...,1.0,3.0,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,zuberbi01,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,374.0,...,383.0,28.0,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0
9131,zuberty01,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,10.0,...,30.0,1.0,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0
9132,zumayjo01,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,71.0,...,210.0,16.0,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0
9133,zuverge01,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,253.0,...,223.0,10.0,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0


In [7]:
# Was cleaned in ETL process

import numpy as np
hall_pitching.loc[hall_pitching["Strikeout to Walk"] == np.inf]

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,SO,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted


In [None]:
# indexNames = hall_pitching[(hall_pitching["Strikeout to Walk"] == np.inf)].index
# hall_pitching.drop(indexNames, inplace = True)
# hall_pitching

In [8]:
# Set target and features variables
y = hall_pitching.inducted
X = hall_pitching.drop(columns=["playerID", "inducted"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

#Scale the data
#Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Create a RFC model
rf_model = RandomForestClassifier(n_estimators=128, random_state = 42)

# Fit the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [10]:
# Make predictions using the testing data
predictions = rf_model.predict(X_test_scaled)

predictions

array([1, 0, 0, ..., 0, 0, 0])

In [11]:
# Evaluate the model
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix.
cm_df = pd.DataFrame(cm, index= ["Actual 0", "Actual 1"], columns = ["Predicted 0", "Predicted 1"])

# Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)

# Display results
print("Confustion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confustion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,2261,2
Actual 1,10,11


Accuracy Score : 0.9947460595446584
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2263
           1       0.85      0.52      0.65        21

    accuracy                           0.99      2284
   macro avg       0.92      0.76      0.82      2284
weighted avg       0.99      0.99      0.99      2284



In [12]:
# Calculate feature importance in the RFC model
importances = rf_model.feature_importances_
importances

array([0.13541956, 0.0278705 , 0.03611699, 0.05002233, 0.04094502,
       0.07378295, 0.03795438, 0.05247399, 0.04098186, 0.02454295,
       0.03623011, 0.04963384, 0.02306703, 0.02803257, 0.04770745,
       0.0907966 , 0.03902184, 0.03740402, 0.05420219, 0.07379383])

In [13]:
# Sort the features by their importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.13541955652708237, 'W'),
 (0.0907965951520382, 'IP'),
 (0.07379383058825004, 'ERA_corrected'),
 (0.07378294558226511, 'SHO'),
 (0.054202193358142935, 'WHIP'),
 (0.05247399304311412, 'H'),
 (0.05002232658925565, 'GS'),
 (0.04963383671452981, 'SO'),
 (0.04770744606372581, 'R'),
 (0.04098185562829685, 'ER'),
 (0.04094502200155065, 'CG'),
 (0.03902184222530875, 'Win Percentage'),
 (0.03795438163530598, 'SV'),
 (0.03740401660091011, 'Strikeout to Walk'),
 (0.03623011335496199, 'BB'),
 (0.0361169885923789, 'G'),
 (0.028032572704116812, 'HBP'),
 (0.02787050113599, 'L'),
 (0.024542953107736395, 'HR'),
 (0.023067029395039636, 'WP')]

In [14]:
# Save the Model
import joblib

filename = 'finalized_pitching_RFCModel.sav'
joblib.dump(rf_model, filename)

['finalized_pitching_RFCModel.sav']

In [15]:
# Load the saved Model

loaded_model = joblib.load(filename)
result = loaded_model.score(X_test_scaled, y_test)
print(result)

0.9947460595446584


In [16]:
# Whole DF
X_2 = hall_pitching.drop(columns=["playerID", "inducted"])
X_2_scaled = X_scaler.transform(X_2)

# Make predictions using the testing data
predictions = loaded_model.predict(X_2_scaled)

predictions

array([0, 0, 0, ..., 0, 0, 0])

In [17]:
predictions = predictions.tolist()

In [18]:
hall_pitching["HOF Prediction"] = predictions
hall_pitching

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction
0,aardsda01,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,160.0,...,12.0,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0,0
1,aasedo01,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,468.0,...,22.0,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0,0
2,abadfe01,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,135.0,...,10.0,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0,0
3,abbeybe01,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,285.0,...,18.0,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0,0
4,abbotda01,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,9.0,...,3.0,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,zuberbi01,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,374.0,...,28.0,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0,0
9131,zuberty01,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,10.0,...,1.0,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0,0
9132,zumayjo01,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,71.0,...,16.0,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0,0
9133,zuverge01,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,253.0,...,10.0,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0,0


In [19]:
hall_pitching.loc[(hall_pitching['inducted'] == 1) & (hall_pitching['HOF Prediction'] == 1) ]

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction
95,alexape01,373.0,208.0,696.0,599.0,437.0,90.0,32.0,4868.0,1476.0,...,39.0,70.0,1851.0,5190.000000,0.641997,2.311251,1.121195,2.559538,1,1
189,ansonca01,0.0,1.0,3.0,0.0,0.0,0.0,1.0,4.0,2.0,...,0.0,0.0,5.0,4.000000,0.000000,0.500000,1.500000,4.500000,1,1
493,becklja01,0.0,1.0,1.0,1.0,0.0,0.0,0.0,9.0,3.0,...,0.0,0.0,8.0,4.000000,0.000000,2.000000,2.500000,6.750000,1,1
546,bendech01,212.0,127.0,459.0,334.0,255.0,40.0,34.0,2645.0,823.0,...,72.0,102.0,1110.0,3017.000000,0.625369,2.403090,1.112695,2.455088,1,1
708,blylebe01,287.0,250.0,692.0,685.0,242.0,60.0,0.0,4632.0,1830.0,...,114.0,155.0,2029.0,4970.000000,0.534451,2.799546,1.197988,3.313883,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8587,wardjo01,164.0,103.0,293.0,262.0,245.0,24.0,3.0,2324.0,576.0,...,144.0,0.0,1185.0,2469.666667,0.614232,3.636364,1.043461,2.099069,1,1
8675,welchmi01,307.0,210.0,565.0,549.0,525.0,41.0,4.0,4588.0,1447.0,...,274.0,52.0,2556.0,4802.000000,0.593810,1.426369,1.225531,2.711995,1,1
8792,wilheho01,143.0,122.0,1070.0,52.0,20.0,5.0,227.0,1757.0,632.0,...,90.0,62.0,773.0,2254.333333,0.539623,2.069409,1.124501,2.523141,1,1
8854,willivi01,249.0,205.0,513.0,471.0,388.0,50.0,11.0,3621.0,1167.0,...,94.0,156.0,1620.0,3996.000000,0.548458,1.362211,1.209459,2.628378,1,1


In [20]:
hall_pitching.loc[(hall_pitching['inducted'] == 1)]

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,WP,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction
95,alexape01,373.0,208.0,696.0,599.0,437.0,90.0,32.0,4868.0,1476.0,...,39.0,70.0,1851.0,5190.000000,0.641997,2.311251,1.121195,2.559538,1,1
189,ansonca01,0.0,1.0,3.0,0.0,0.0,0.0,1.0,4.0,2.0,...,0.0,0.0,5.0,4.000000,0.000000,0.500000,1.500000,4.500000,1,1
493,becklja01,0.0,1.0,1.0,1.0,0.0,0.0,0.0,9.0,3.0,...,0.0,0.0,8.0,4.000000,0.000000,2.000000,2.500000,6.750000,1,1
546,bendech01,212.0,127.0,459.0,334.0,255.0,40.0,34.0,2645.0,823.0,...,72.0,102.0,1110.0,3017.000000,0.625369,2.403090,1.112695,2.455088,1,1
708,blylebe01,287.0,250.0,692.0,685.0,242.0,60.0,0.0,4632.0,1830.0,...,114.0,155.0,2029.0,4970.000000,0.534451,2.799546,1.197988,3.313883,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8737,whitede01,0.0,0.0,2.0,0.0,0.0,0.0,1.0,19.0,8.0,...,0.0,0.0,15.0,10.000000,0.000000,1.500000,2.100000,7.200000,1,0
8792,wilheho01,143.0,122.0,1070.0,52.0,20.0,5.0,227.0,1757.0,632.0,...,90.0,62.0,773.0,2254.333333,0.539623,2.069409,1.124501,2.523141,1,1
8854,willivi01,249.0,205.0,513.0,471.0,388.0,50.0,11.0,3621.0,1167.0,...,94.0,156.0,1620.0,3996.000000,0.548458,1.362211,1.209459,2.628378,1,1
9028,wynnea01,300.0,244.0,691.0,612.0,290.0,49.0,15.0,4291.0,1796.0,...,51.0,64.0,2037.0,4564.000000,0.551471,1.314930,1.329097,3.541630,1,0


In [21]:
# Measure probability of predictions
prediction_proba = rf_model.predict_proba(X_2_scaled)

prediction_proba

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]])

In [22]:
prediction_proba = prediction_proba.tolist()

In [23]:
# Split the list into two lists
N_proba, Y_proba = map(list, zip(*prediction_proba))

In [24]:
# Create a column with the probability for a Yes
hall_pitching["Yes HOF Probability"] = Y_proba
hall_pitching

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction,Yes HOF Probability
0,aardsda01,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,160.0,...,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0,0,0.0
1,aasedo01,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,468.0,...,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0,0,0.0
2,abadfe01,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,135.0,...,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0,0,0.0
3,abbeybe01,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,285.0,...,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0,0,0.0
4,abbotda01,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,9.0,...,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,zuberbi01,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,374.0,...,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0,0,0.0
9131,zuberty01,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,10.0,...,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0,0,0.0
9132,zumayjo01,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,71.0,...,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0,0,0.0
9133,zuverge01,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,253.0,...,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0,0,0.0


In [25]:
# Convert "Yes HOF Probability" column from object to float64
hall_pitching["Yes HOF Probability"] = pd.to_numeric(hall_pitching["Yes HOF Probability"])

In [26]:
hall_pitching["Yes HOF Probability"] = 100 * hall_pitching["Yes HOF Probability"]

In [27]:
hall_pitching

Unnamed: 0,playerID,W,L,G,GS,CG,SHO,SV,H,ER,...,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction,Yes HOF Probability
0,aardsda01,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,160.0,...,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0,0,0.0
1,aasedo01,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,468.0,...,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0,0,0.0
2,abadfe01,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,135.0,...,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0,0,0.0
3,abbeybe01,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,285.0,...,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0,0,0.0
4,abbotda01,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,9.0,...,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,zuberbi01,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,374.0,...,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0,0,0.0
9131,zuberty01,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,10.0,...,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0,0,0.0
9132,zumayjo01,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,71.0,...,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0,0,0.0
9133,zuverge01,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,253.0,...,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0,0,0.0


In [28]:
# Import CSV
file_path = Path('Player_Names.csv')
Player_Names_df = pd.read_csv(file_path)
Player_Names_df

Unnamed: 0.1,Unnamed: 0,playerID,First Name,Last Name
0,0,aardsda01,David,Aardsma
1,1,aaronha01,Hank,Aaron
2,2,aaronto01,Tommie,Aaron
3,3,aasedo01,Don,Aase
4,4,abadan01,Andy,Abad
...,...,...,...,...
20088,20088,zupofr01,Frank,Zupo
20089,20089,zuvelpa01,Paul,Zuvella
20090,20090,zuverge01,George,Zuverink
20091,20091,zwilldu01,Dutch,Zwilling


In [29]:
Player_Names_df.drop(columns = ['Unnamed: 0'])

Unnamed: 0,playerID,First Name,Last Name
0,aardsda01,David,Aardsma
1,aaronha01,Hank,Aaron
2,aaronto01,Tommie,Aaron
3,aasedo01,Don,Aase
4,abadan01,Andy,Abad
...,...,...,...
20088,zupofr01,Frank,Zupo
20089,zuvelpa01,Paul,Zuvella
20090,zuverge01,George,Zuverink
20091,zwilldu01,Dutch,Zwilling


In [30]:
Names_Batter_df = Player_Names_df.merge(hall_pitching, left_on = 'playerID', right_on = 'playerID')

In [31]:
Names_Batter_df

Unnamed: 0.1,Unnamed: 0,playerID,First Name,Last Name,W,L,G,GS,CG,SHO,...,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction,Yes HOF Probability
0,0,aardsda01,David,Aardsma,16.0,18.0,331.0,0.0,0.0,0.0,...,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0,0,0.0
1,3,aasedo01,Don,Aase,66.0,60.0,448.0,91.0,22.0,5.0,...,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0,0,0.0
2,5,abadfe01,Fernando,Abad,8.0,29.0,384.0,6.0,0.0,0.0,...,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0,0,0.0
3,8,abbeybe01,Bert,Abbey,22.0,40.0,79.0,65.0,52.0,0.0,...,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0,0,0.0
4,10,abbotda01,Dan,Abbott,0.0,2.0,3.0,1.0,1.0,0.0,...,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,20081,zuberbi01,Bill,Zuber,43.0,42.0,224.0,65.0,23.0,3.0,...,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0,0,0.0
9131,20083,zuberty01,Tyler,Zuber,1.0,2.0,23.0,0.0,0.0,0.0,...,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0,0,0.0
9132,20085,zumayjo01,Joel,Zumaya,13.0,12.0,171.0,0.0,0.0,0.0,...,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0,0,0.0
9133,20090,zuverge01,George,Zuverink,32.0,36.0,265.0,31.0,9.0,2.0,...,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0,0,0.0


In [32]:
Names_Batter_df = Names_Batter_df.drop(columns = ["Unnamed: 0", "playerID"])

In [33]:
Names_Batter_df

Unnamed: 0,First Name,Last Name,W,L,G,GS,CG,SHO,SV,H,...,HBP,R,IP,Win Percentage,Strikeout to Walk,WHIP,ERA_corrected,inducted,HOF Prediction,Yes HOF Probability
0,David,Aardsma,16.0,18.0,331.0,0.0,0.0,0.0,69.0,296.0,...,16.0,169.0,337.000000,0.470588,1.857923,1.421365,4.272997,0,0,0.0
1,Don,Aase,66.0,60.0,448.0,91.0,22.0,5.0,82.0,1085.0,...,7.0,503.0,1109.333333,0.523810,1.402626,1.390024,3.796875,0,0,0.0
2,Fernando,Abad,8.0,29.0,384.0,6.0,0.0,0.0,2.0,309.0,...,12.0,143.0,330.666667,0.216216,2.413793,1.285282,3.674395,0,0,0.0
3,Bert,Abbey,22.0,40.0,79.0,65.0,52.0,0.0,1.0,686.0,...,26.0,442.0,568.000000,0.354839,0.838542,1.545775,4.515845,0,0,0.0
4,Dan,Abbott,0.0,2.0,3.0,1.0,1.0,0.0,1.0,19.0,...,1.0,14.0,13.000000,0.000000,0.125000,2.076923,6.230769,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9130,Bill,Zuber,43.0,42.0,224.0,65.0,23.0,3.0,6.0,767.0,...,4.0,418.0,786.000000,0.505882,0.818376,1.571247,4.282443,0,0,0.0
9131,Tyler,Zuber,1.0,2.0,23.0,0.0,0.0,0.0,0.0,15.0,...,1.0,11.0,22.000000,0.333333,1.500000,1.590909,4.090909,0,0,0.0
9132,Joel,Zumaya,13.0,12.0,171.0,0.0,0.0,0.0,5.0,169.0,...,4.0,80.0,209.666667,0.520000,1.842105,1.349762,3.047695,0,0,0.0
9133,George,Zuverink,32.0,36.0,265.0,31.0,9.0,2.0,40.0,660.0,...,27.0,296.0,642.333333,0.470588,1.098522,1.343539,3.544888,0,0,0.0


In [34]:
Names_Batter_df.to_json('Names_Pitching.json', orient = 'records')

In [None]:
hall_pitching.loc[(hall_pitcher_df['Yes HOF Probability'] >= 90)]

In [None]:
# Format the "Yes HOF Probability" column to a percentage, does not maintain float 64 type
# hall_pitcher_df["Yes HOF Probability"] = hall_pitcher_df["Yes HOF Probability"].map("{:.2%}".format)