In [632]:
#Importing Dependencies
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from pybaseball import team_batting

#Linear regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

#Logistic regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#Data
from mega import model_df
from mega import unplayed_games




In [633]:
#Copying and Viewing our data
df=model_df.copy()

display(df.head())
display(df.tail())

Unnamed: 0,Gm#,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,1,L,D,A,MIL,31,0.258065,8,1,8,0.351351,4,38,0.315789,12,3,11,0.333333,1
1,2,W,N,A,MIL,32,0.28125,9,1,5,0.342857,3,37,0.378378,14,2,11,0.390244,2
2,3,W,N,A,MIL,34,0.264706,9,1,5,0.305556,2,37,0.405405,15,3,8,0.463415,4
3,4,W,D,H,SDP,33,0.212121,7,0,8,0.235294,1,0,0.0,0,0,0,0.0,0
4,5,W,N,H,SDP,33,0.242424,8,0,4,0.305556,3,0,0.0,0,0,0,0.0,0


Unnamed: 0,Gm#,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
1808,129,L,N,A,NYY,28,0.178571,5,2,5,0.233333,2,31,0.129032,4,0,9,0.205882,3
1809,130,W,D,A,NYY,29,0.172414,5,1,9,0.294118,4,39,0.333333,13,2,14,0.386364,4
1810,131,L,D,A,NYY,35,0.342857,12,5,6,0.410256,3,29,0.206897,6,0,8,0.323529,5
1811,132,W,N,H,MIA,34,0.264706,9,0,8,0.324324,3,28,0.214286,6,1,11,0.3125,4
1812,133,L,N,H,MIA,35,0.342857,12,2,5,0.428571,6,34,0.382353,13,4,8,0.405405,1


In [634]:
#Removing game number, index works just fine.
df=df.drop("Gm#",axis=1) 
display(df.head())
display(df.tail())

Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,L,D,A,MIL,31,0.258065,8,1,8,0.351351,4,38,0.315789,12,3,11,0.333333,1
1,W,N,A,MIL,32,0.28125,9,1,5,0.342857,3,37,0.378378,14,2,11,0.390244,2
2,W,N,A,MIL,34,0.264706,9,1,5,0.305556,2,37,0.405405,15,3,8,0.463415,4
3,W,D,H,SDP,33,0.212121,7,0,8,0.235294,1,0,0.0,0,0,0,0.0,0
4,W,N,H,SDP,33,0.242424,8,0,4,0.305556,3,0,0.0,0,0,0,0.0,0


Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
1808,L,N,A,NYY,28,0.178571,5,2,5,0.233333,2,31,0.129032,4,0,9,0.205882,3
1809,W,D,A,NYY,29,0.172414,5,1,9,0.294118,4,39,0.333333,13,2,14,0.386364,4
1810,L,D,A,NYY,35,0.342857,12,5,6,0.410256,3,29,0.206897,6,0,8,0.323529,5
1811,W,N,H,MIA,34,0.264706,9,0,8,0.324324,3,28,0.214286,6,1,11,0.3125,4
1812,L,N,H,MIA,35,0.342857,12,2,5,0.428571,6,34,0.382353,13,4,8,0.405405,1


## Going Through Linear and Logistic Regression Models

### Linear

In [635]:
#The first step is getting the already preworked data into a useable format. 
#This includes the binary values and the categorical.

#Converting the binary columns with dummies.
dummies= pd.get_dummies(df[['W/L', 'D/N', 'H/A']], drop_first=True).astype(int)
df[['W/L', 'D/N', 'H/A']]=dummies[["W/L_W","D/N_N","H/A_H"]]

df.head()

Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,0,0,0,MIL,31,0.258065,8,1,8,0.351351,4,38,0.315789,12,3,11,0.333333,1
1,1,1,0,MIL,32,0.28125,9,1,5,0.342857,3,37,0.378378,14,2,11,0.390244,2
2,1,1,0,MIL,34,0.264706,9,1,5,0.305556,2,37,0.405405,15,3,8,0.463415,4
3,1,0,1,SDP,33,0.212121,7,0,8,0.235294,1,0,0.0,0,0,0,0.0,0
4,1,1,1,SDP,33,0.242424,8,0,4,0.305556,3,0,0.0,0,0,0,0.0,0


In [636]:
#Now to convert the opponent column into values.

#Getting the label encoder
encoder=LabelEncoder()
#Fit and transform the categorical column
df['Opp'] = encoder.fit_transform(df['Opp'])
df.head()

Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,0,0,0,14,31,0.258065,8,1,8,0.351351,4,38,0.315789,12,3,11,0.333333,1
1,1,1,0,14,32,0.28125,9,1,5,0.342857,3,37,0.378378,14,2,11,0.390244,2
2,1,1,0,14,34,0.264706,9,1,5,0.305556,2,37,0.405405,15,3,8,0.463415,4
3,1,0,1,21,33,0.212121,7,0,8,0.235294,1,0,0.0,0,0,0,0.0,0
4,1,1,1,21,33,0.242424,8,0,4,0.305556,3,0,0.0,0,0,0,0.0,0


In [637]:
#Listing the columns to make sure the continuous columns are numeric in order to scale
column_list = df.columns.tolist()
categorical_cols={"W/L","D/N","H/A","Opp"}
continuous_cols=list(set(column_list) - categorical_cols)
continuous_cols


#Making sure any non numeric data gets converted to nans.
df[continuous_cols] = df[continuous_cols].apply(pd.to_numeric, errors='coerce')
#dropping those as they'd mess up the data
df=df.dropna()

In [638]:
#Now that the data is cleaned up we can train test split.
X=df.drop(columns="W/L")
y=df["W/L"]
#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=69)

In [639]:
cat=["D/N","H/A","Opp"]
#Getting the scaler ready
scaler=StandardScaler()

#Fit and transform
x_train_scaled=scaler.fit_transform(X_train[continuous_cols])
x_test_scaled=scaler.transform(X_test[continuous_cols])

#Adding back the categorical data
x_train_scaled_df=pd.DataFrame(x_train_scaled,columns=continuous_cols,index=X_train.index)
x_train_final=pd.concat([x_train_scaled_df,X_train[cat]],axis=1)
x_train_final=x_train_final.dropna()

x_test_scaled_df=pd.DataFrame(x_test_scaled,columns=continuous_cols,index=X_test.index)
x_test_final=pd.concat([x_test_scaled_df,X_test[cat]],axis=1)
x_test_final=x_test_final.dropna()

In [640]:
#Making the model
lr=LinearRegression()
# Fit the model using the training data
lr.fit(x_train_final, y_train)

In [641]:
#Making predictions
prediction=lr.predict(x_test_final)

In [642]:
# Evaluate the model
mse = mean_squared_error(y_test, prediction)
r2 = r2_score(y_test, prediction)

print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')

Mean Squared Error: 0.1472528092534297
R² Score: 0.3955501553745512


In [643]:
lr.score(x_test_final, y_test)

0.3955501553745512

The MSE is a low value which indicates good performance. The R² score is low however.

### Logistic

In [644]:
# Initialize the logistic regression model
lrm = LogisticRegression(random_state=2,max_iter=1000)

# Fit the model using the training data
lrm.fit(x_train_final, y_train)

In [645]:
#Predicting 
y_train_pred = lrm.predict(x_train_final)
y_test_pred = lrm.predict(x_test_final)

In [646]:
#Scoring the model
lrm_train_score=accuracy_score(y_train, y_train_pred)
lrm_test_score=accuracy_score(y_test, y_test_pred)
print(f"lrm train score: {lrm_train_score}")
print(f"lrm test score: {lrm_test_score}")

lrm train score: 0.7917620137299771
lrm test score: 0.8127853881278538


In [647]:
conf_matrix = confusion_matrix(y_test, y_test_pred)
class_report = classification_report(y_test, y_test_pred)

print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{class_report}")

Confusion Matrix:
[[221  33]
 [ 49 135]]
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.87      0.84       254
           1       0.80      0.73      0.77       184

    accuracy                           0.81       438
   macro avg       0.81      0.80      0.81       438
weighted avg       0.81      0.81      0.81       438



The logistic regression model achieved 81% accuracy, effectively identifying both classes with good precision and recall, meaning it correctly predicted outcomes with a reasonable balance between false positives and false negatives.

### Comparing the models

In [648]:
print(f"The linear model scored {r2:.4f}")
print(f"The logistic model scored {lrm_test_score:.4f}")

The linear model scored 0.3956
The logistic model scored 0.8128


The logistic regression model outperformed the linear regression model with an accuracy score of 81.28% compared to 39.56%, indicating that the logistic model provided a more reliable classification of outcomes.

### Using the Logistic model on the unplayed games

In [649]:
#Bringing in the unplayed games data.
unplayed_df=unplayed_games.copy()
unplayed_df=unplayed_df.drop(columns="Gm#")
unplayed_df.head(3)

Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
1813,8:40 pm,Game Preview and Matchups,H,MIA,33,0.242424,8,1,10,0.285714,2,30,0.3,9,1,10,0.416667,6
1814,3:10 pm,Game Preview and Matchups,H,MIA,42,0.428571,18,2,7,0.489362,4,36,0.333333,12,3,10,0.390244,4
1815,8:40 pm,Game Preview and Matchups,H,BAL,-1,0.314286,-1,-1,6,-1.0,-1,33,0.30303,10,1,6,0.30303,0


In [650]:
#Converting D/N to D/N based on the time in the W/L Col
unplayed_df["D/N"]=unplayed_df["W/L"]
gametimes=unplayed_df["D/N"].unique().tolist()

In [651]:
# Define cutoff time
cutoff_time = datetime.strptime('6:00 pm', '%I:%M %p')

# Create the dictionary with inline logic
time_labels = {
    time: 'D' if datetime.strptime(time, '%I:%M %p') < cutoff_time else 'N'
    for time in gametimes
}
#Converting the times based on the time to d/n
unplayed_df["D/N"]=unplayed_df["D/N"].map(time_labels)
unplayed_df.head()

Unnamed: 0,W/L,D/N,H/A,Opp,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
1813,8:40 pm,N,H,MIA,33,0.242424,8,1,10,0.285714,2,30,0.3,9,1,10,0.416667,6
1814,3:10 pm,D,H,MIA,42,0.428571,18,2,7,0.489362,4,36,0.333333,12,3,10,0.390244,4
1815,8:40 pm,N,H,BAL,-1,0.314286,-1,-1,6,-1.0,-1,33,0.30303,10,1,6,0.30303,0
1816,8:10 pm,N,H,BAL,-1,-1.0,-1,-1,-1,-1.0,-1,29,0.310345,9,0,9,0.393939,4
1817,3:10 pm,D,H,BAL,-1,-1.0,-1,-1,-1,-1.0,-1,30,0.2,6,0,11,0.25,2


In [652]:
opp_list=unplayed_df["Opp"].unique().tolist()
opp_list

['MIA', 'BAL', 'ATL', 'MIL', 'DET', 'CHC', 'ARI', 'LAD', 'STL']

In [653]:
# Get the team batting stats for the current season (you can specify a year if needed)
team_stats = team_batting(start_season=2024)
ts_cl=team_stats.columns.tolist()
team_stats.head()
# ts_cl

Unnamed: 0,teamIDfg,Season,Team,Age,G,AB,PA,H,1B,2B,...,maxEV,HardHit,HardHit%,Events,CStr%,CSW%,xBA,xSLG,xwOBA,L-WAR
0,9,2024,NYY,29,1994,4725,5405,1191,747,216,...,120.0,1544,0.421,3664,0.177,0.273,,,,30.2
1,15,2024,ARI,28,2106,4801,5456,1257,811,236,...,117.0,1472,0.388,3798,0.173,0.265,,,,27.7
2,22,2024,LAD,30,2055,4770,5395,1211,742,256,...,119.2,1492,0.409,3650,0.159,0.266,,,,27.7
3,2,2024,BAL,28,2081,4844,5380,1221,746,235,...,114.4,1584,0.425,3727,0.159,0.266,,,,26.2
4,26,2024,PHI,29,1991,4796,5358,1236,801,244,...,115.6,1461,0.398,3670,0.156,0.269,,,,23.2


In [654]:
#Setting up a new smaller df for the averages
avg_df=team_stats[["Team","AVG","OBP"]]
avg_df=avg_df.rename(columns={"AVG":"Opp_ba","Team":"Opp", "OBP":"Opp_obp"})
avg_df.head()

Unnamed: 0,Opp,Opp_ba,Opp_obp
0,NYY,0.252,0.335
1,ARI,0.262,0.336
2,LAD,0.254,0.33
3,BAL,0.252,0.318
4,PHI,0.258,0.327


In [655]:
#Converting everything to averages so that the log reg model will be more applicable
avg_df["Opp_at_bats"] = team_stats['AB'] / team_stats['G']
avg_df["OPP_kk"]=team_stats['SO']/team_stats['G']
avg_df["Opp_walks"] = team_stats['BB'] / team_stats['G']
avg_df["Opp_hits"]=team_stats['H']/team_stats['G']
avg_df["OPP_HR_Column"]=team_stats['HR']/team_stats['G']

avg_df.head()

Unnamed: 0,Opp,Opp_ba,Opp_obp,Opp_at_bats,OPP_kk,Opp_walks,Opp_hits,OPP_HR_Column
0,NYY,0.252,0.335,2.369609,0.566199,0.285356,0.597292,0.10682
1,ARI,0.262,0.336,2.279677,0.520418,0.233618,0.596866,0.084046
2,LAD,0.254,0.33,2.321168,0.573236,0.249635,0.589294,0.092457
3,BAL,0.252,0.318,2.327727,0.560308,0.20519,0.586737,0.100432
4,PHI,0.258,0.327,2.40884,0.589151,0.231542,0.620794,0.085384


In [656]:
#Pulling out COL info:
Rockies_df = avg_df[avg_df["Opp"] == "COL"]
Rockies_df=Rockies_df.rename({"Opp_at_bats":"COL_at_bats",
                              "Opp_ba":"COL_ba",
                              "Opp_hits":"COL_hits", 
                              "OPP_HR_Column":"COL_hr",
                              "OPP_kk":"COL_kk",
                              "Opp_obp":"COL_obp",
                              "Opp_walks":"COL_walks",
                              "Opp":"W/L"
                              },
                              axis=1)
Rockies_df.set_index("W/L",inplace=True)
Rockies_df

Unnamed: 0_level_0,COL_ba,COL_obp,COL_at_bats,COL_kk,COL_walks,COL_hits,COL_hr
W/L,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
COL,0.243,0.305,2.413583,0.706031,0.201723,0.586923,0.07704


In [657]:
#Bringing in the opp averages
# Set index to update
unplayed_df.set_index("Opp", inplace=True)
avg_df.set_index("Opp", inplace=True)

# Use the update method to overwrite the matching columns
unplayed_df.update(avg_df)
unplayed_df.reset_index(inplace=True)
unplayed_df.head()

Unnamed: 0,Opp,W/L,D/N,H/A,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,MIA,8:40 pm,N,H,33,0.242424,8,1,10,0.285714,2,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
1,MIA,3:10 pm,D,H,42,0.428571,18,2,7,0.489362,4,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
2,BAL,8:40 pm,N,H,-1,0.314286,-1,-1,6,-1.0,-1,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
3,BAL,8:10 pm,N,H,-1,-1.0,-1,-1,-1,-1.0,-1,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
4,BAL,3:10 pm,D,H,-1,-1.0,-1,-1,-1,-1.0,-1,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519


In [658]:
#Merging the Rockies data into the unplayed games.

unplayed_df["W/L"]="COL"
unplayed_df.set_index("W/L",inplace=True)

unplayed_df.update(Rockies_df)
unplayed_df.reset_index(inplace=True)
unplayed_df.head()


Unnamed: 0,W/L,Opp,D/N,H/A,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,COL,MIA,N,H,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
1,COL,MIA,D,H,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
2,COL,BAL,N,H,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
3,COL,BAL,N,H,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
4,COL,BAL,D,H,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519


In [659]:
#Converting the binary columns with dummies.
udummies= pd.get_dummies(unplayed_df[['D/N', 'H/A']], drop_first=True).astype(int)
unplayed_df[['D/N', 'H/A']]=udummies[["D/N_N","H/A_H"]]

unplayed_df.head()

Unnamed: 0,W/L,Opp,D/N,H/A,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,COL,MIA,1,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
1,COL,MIA,0,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
2,COL,BAL,1,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
3,COL,BAL,1,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519
4,COL,BAL,0,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519


In [660]:
#Encoding the opp col
unplayed_df['Opp'] = encoder.fit_transform(unplayed_df['Opp'])
unplayed_df.head(3)

Unnamed: 0,W/L,Opp,D/N,H/A,COL_at_bats,COL_ba,COL_hits,COL_hr,COL_kk,COL_obp,COL_walks,Opp_at_bats,Opp_ba,Opp_hits,OPP_HR_Column,OPP_kk,Opp_obp,Opp_walks
0,COL,6,1,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
1,COL,6,0,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.246452,0.24,0.539262,0.060549,0.566698,0.296,0.159413
2,COL,2,1,1,2.413583,0.243,0.586923,0.07704,0.706031,0.305,0.201723,2.327727,0.252,0.586737,0.100432,0.560308,0.318,0.20519


In [661]:
#Reordering columns
unplayed_df=unplayed_df.drop("W/L",axis=1)
unplayed_df=unplayed_df[x_train_final.columns]


In [662]:
#Splitting the data based on data type.
unplayed_cat=unplayed_df[cat]
display(unplayed_cat.head())
unplayed_cont=unplayed_df[continuous_cols]
display(unplayed_cont.head())

Unnamed: 0,D/N,H/A,Opp
0,1,1,6
1,0,1,6
2,1,1,2
3,1,1,2
4,0,1,2


Unnamed: 0,Opp_hits,COL_obp,COL_kk,COL_walks,Opp_ba,COL_hr,Opp_at_bats,OPP_kk,COL_hits,Opp_obp,OPP_HR_Column,Opp_walks,COL_ba,COL_at_bats
0,0.539262,0.305,0.706031,0.201723,0.24,0.07704,2.246452,0.566698,0.586923,0.296,0.060549,0.159413,0.243,2.413583
1,0.539262,0.305,0.706031,0.201723,0.24,0.07704,2.246452,0.566698,0.586923,0.296,0.060549,0.159413,0.243,2.413583
2,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583
3,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583
4,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583


In [663]:
#Scaling the data
unplayed_scaled = scaler.transform(unplayed_df[continuous_cols])
unplayed_scaled_df=pd.DataFrame(unplayed_scaled,columns=continuous_cols,index=unplayed_df.index)


In [664]:
#Recombinging the data.
X_unplayed = pd.concat([unplayed_scaled_df,unplayed_cat], axis=1)



In [665]:
#Making predictions!!!
w_l_predictions=lrm.predict(X_unplayed)

#Making a df
unplayed_df["W/L pred"]=w_l_predictions
unplayed_df

Unnamed: 0,Opp_hits,COL_obp,COL_kk,COL_walks,Opp_ba,COL_hr,Opp_at_bats,OPP_kk,COL_hits,Opp_obp,OPP_HR_Column,Opp_walks,COL_ba,COL_at_bats,D/N,H/A,Opp,W/L pred
0,0.539262,0.305,0.706031,0.201723,0.24,0.07704,2.246452,0.566698,0.586923,0.296,0.060549,0.159413,0.243,2.413583,1,1,6,1
1,0.539262,0.305,0.706031,0.201723,0.24,0.07704,2.246452,0.566698,0.586923,0.296,0.060549,0.159413,0.243,2.413583,0,1,6,1
2,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583,1,1,2,1
3,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583,1,1,2,1
4,0.586737,0.305,0.706031,0.201723,0.252,0.07704,2.327727,0.560308,0.586923,0.318,0.100432,0.20519,0.243,2.413583,0,1,2,1
5,0.580102,0.305,0.706031,0.201723,0.241,0.07704,2.410714,0.655102,0.586923,0.306,0.092347,0.210714,0.243,2.413583,1,0,1,1
6,0.580102,0.305,0.706031,0.201723,0.241,0.07704,2.410714,0.655102,0.586923,0.306,0.092347,0.210714,0.243,2.413583,1,0,1,1
7,0.580102,0.305,0.706031,0.201723,0.241,0.07704,2.410714,0.655102,0.586923,0.306,0.092347,0.210714,0.243,2.413583,1,0,1,1
8,0.582804,0.305,0.706031,0.201723,0.251,0.07704,2.319003,0.610161,0.586923,0.33,0.076698,0.258427,0.243,2.413583,1,0,7,1
9,0.582804,0.305,0.706031,0.201723,0.251,0.07704,2.319003,0.610161,0.586923,0.33,0.076698,0.258427,0.243,2.413583,1,0,7,1
