# **5 - Modeling**

## Loading Libraries

In [3]:
import pandas as pd
import tensorflow as tf
import pickle

## Importing Pickle

In [4]:
with open('df.pkl', 'rb') as f:
    df = pickle.load(f)

## Selecting Features and Outcome

In [5]:
X = df[['Fight_Name', 'Date', 'Location', 'R', 'B', 'Division', 'Format', 'Referee', 'R_Name', 'R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_Stance', 'R_Birthday', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV', 'B_Name', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_Stance', 'B_Birthday', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']]
Y = df['Winner']

## Shuffling and Splitting

In [None]:
from sklearn.model_selection import train_test_split

# Train = 60%, Validation = 20%, Test = 20%
X_TV, X_Test, Y_TV, Y_Test = train_test_split(X, Y, test_size = 0.2, random_state = 3)
X_Train, X_Validation, Y_Train, Y_Validation = train_test_split(X_TV, Y_TV, test_size = 0.25, random_state = 3)

print(f'X Shape: {X.shape} | X Type: {type(X)}')
print(f'Y Shape: {Y.shape} | Y Type: {type(Y)}')
print(f'X_Train Shape: {X_Train.shape} | X_Train Type: {type(X_Train)}')
print(f'X_Validation Shape: {X_Validation.shape} | X_Validation Type: {type(X_Validation)}')
print(f'X_Test Shape: {X_Test.shape} | X_Test Type: {type(X_Test)}')
print(f'Y_Train Shape: {Y_Train.shape} | Y_Train Type: {type(Y_Train)}')
print(f'Y_Validation Shape: {Y_Validation.shape} | Y_Validation Type: {type(Y_Validation)}')
print(f'Y_Test Shape: {Y_Test.shape} | Y_Test Type: {type(Y_Test)}')

### Reset Indexes

In [None]:
X_Train = X_Train.reset_index(drop = True)
X_Validation = X_Validation.reset_index(drop = True)
X_Test = X_Test.reset_index(drop = True)
Y_Train = Y_Train.reset_index(drop = True)
Y_Validation = Y_Validation.reset_index(drop = True)
Y_Test = Y_Test.reset_index(drop = True)

### Standardizing

In [None]:
# Selecting Numeric Columns
Numeric_Columns = X_Train.select_dtypes(include = ['float64', 'int64']).columns
print('Numeric Columns:', Numeric_Columns)
print('# of Numeric Columns:', len(Numeric_Columns))
print('------------------------------------')

# Selecting Object Columns
Object_Columns = X_Train.select_dtypes(include = ['object', 'category', 'datetime64']).columns
print('Object Columns:', Object_Columns)
print('# of Object Columns:', len(Object_Columns))
print('------------------------------------')

from sklearn.preprocessing import StandardScaler

# Scaling X
X_Scaler = StandardScaler()

# Fit Scaler to X_Train
X_Scaler.fit(X_Train[Numeric_Columns]) 

# Scale Numeric Columns
X_Train_Numeric_STD = X_Scaler.transform(X_Train[Numeric_Columns]) 
X_Validation_Numeric_STD = X_Scaler.transform(X_Validation[Numeric_Columns])
X_Test_Numeric_STD = X_Scaler.transform(X_Test[Numeric_Columns])

# Scaler automatically converts pd DataFrame to 2D np Array, so we need to convert it back to pd DataFrame
X_Train_Numeric_STD = pd.DataFrame(X_Train_Numeric_STD, columns = Numeric_Columns)
X_Validation_Numeric_STD = pd.DataFrame(X_Validation_Numeric_STD, columns = Numeric_Columns)
X_Test_Numeric_STD = pd.DataFrame(X_Test_Numeric_STD, columns = Numeric_Columns)

# Checking Shapes and Indices before Concatenation
print("Shapes Before Concatenation:")
print(X_Train_Numeric_STD.shape)
print(X_Train[Object_Columns].shape)
print("Indices Before Concatenation:")
print(X_Train_Numeric_STD.index)
print(X_Train[Object_Columns].index)
print('------------------------------------')

# Concatenate Numeric and Object Columns
X_Train = pd.concat([X_Train_Numeric_STD, X_Train[Object_Columns]], axis = 1)
X_Validation = pd.concat([X_Validation_Numeric_STD, X_Validation[Object_Columns]], axis = 1)
X_Test = pd.concat([X_Test_Numeric_STD, X_Test[Object_Columns]], axis = 1)

# Checking Shapes and Indices after Concatenation
print("Shapes After Concatenation:")
print(X_Train.shape)
print("Indices After Concatenation:")
print(X_Train.index)

print(Y_Train.index)

# Re-Ordering Columns
order = ['Fight_Name', 'Date', 'Location', 'R', 'B', 'Division', 'Format', 'Referee',  
        'R_Name', 'R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_Stance', 'R_Birthday', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV',
        'B_Name', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_Stance', 'B_Birthday', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']

X_Train = X_Train.reindex(columns = order)
X_Validation = X_Validation.reindex(columns = order)
X_Test = X_Test.reindex(columns = order)

print('************************************')
print('Scaled X')
display(X_Train.head())

## Reset Indexes

In [None]:
X_Train = X_Train.reset_index(drop = True)
X_Validation = X_Validation.reset_index(drop = True)
X_Test = X_Test.reset_index(drop = True)
Y_Train = Y_Train.reset_index(drop = True)
Y_Validation = Y_Validation.reset_index(drop = True)
Y_Test = Y_Test.reset_index(drop = True)

## Standardizing

In [None]:
# Selecting Numeric Columns
Numeric_Columns = X_Train.select_dtypes(include = ['float64', 'int64']).columns
print('Numeric Columns:', Numeric_Columns)
print('# of Numeric Columns:', len(Numeric_Columns))
print('------------------------------------')

# Selecting Object Columns
Object_Columns = X_Train.select_dtypes(include = ['object', 'category', 'datetime64']).columns
print('Object Columns:', Object_Columns)
print('# of Object Columns:', len(Object_Columns))
print('------------------------------------')

from sklearn.preprocessing import StandardScaler

# Scaling X
X_Scaler = StandardScaler()

# Fit Scaler to X_Train
X_Scaler.fit(X_Train[Numeric_Columns]) 

# Scale Numeric Columns
X_Train_Numeric_STD = X_Scaler.transform(X_Train[Numeric_Columns]) 
X_Validation_Numeric_STD = X_Scaler.transform(X_Validation[Numeric_Columns])
X_Test_Numeric_STD = X_Scaler.transform(X_Test[Numeric_Columns])

# Scaler automatically converts pd DataFrame to 2D np Array, so we need to convert it back to pd DataFrame
X_Train_Numeric_STD = pd.DataFrame(X_Train_Numeric_STD, columns = Numeric_Columns)
X_Validation_Numeric_STD = pd.DataFrame(X_Validation_Numeric_STD, columns = Numeric_Columns)
X_Test_Numeric_STD = pd.DataFrame(X_Test_Numeric_STD, columns = Numeric_Columns)

# Checking Shapes and Indices before Concatenation
print("Shapes Before Concatenation:")
print(X_Train_Numeric_STD.shape)
print(X_Train[Object_Columns].shape)
print("Indices Before Concatenation:")
print(X_Train_Numeric_STD.index)
print(X_Train[Object_Columns].index)
print('------------------------------------')

# Concatenate Numeric and Object Columns
X_Train = pd.concat([X_Train_Numeric_STD, X_Train[Object_Columns]], axis = 1)
X_Validation = pd.concat([X_Validation_Numeric_STD, X_Validation[Object_Columns]], axis = 1)
X_Test = pd.concat([X_Test_Numeric_STD, X_Test[Object_Columns]], axis = 1)

# Checking Shapes and Indices after Concatenation
print("Shapes After Concatenation:")
print(X_Train.shape)
print("Indices After Concatenation:")
print(X_Train.index)

print(Y_Train.index)

# Re-Ordering Columns
order = ['Fight_Name', 'Date', 'Location', 'R', 'B', 'Division', 'Format', 'Referee',  
        'R_Name', 'R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_Stance', 'R_Birthday', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV',
        'B_Name', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_Stance', 'B_Birthday', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']

X_Train = X_Train.reindex(columns = order)
X_Validation = X_Validation.reindex(columns = order)
X_Test = X_Test.reindex(columns = order)

print('************************************')
print('Scaled X')
display(X_Train.head())

## **Baseline Model**

In [None]:
# Majority Class
print(f'Majority Class: {Y_Train.value_counts().idxmax()} (Red)')

print('------------------------------------')
# Accuracy
print(f"Training Accuracy: {Y_Train.sum() / Y_Train.count()}")
print(f"Test Accuracy: {Y_Test.sum() / Y_Test.count()}")
print('------------------------------------')

# Loss
from sklearn.metrics import log_loss

# Training Loss
# Probabilities
Train_R_Predicted_Probability = Y_Train.sum() / Y_Train.count() # Accuracy
Train_B_Predicted_Probability = 1 - Train_R_Predicted_Probability
# Reshape
Train_RPP_RS = np.full(Y_Train.shape, Train_R_Predicted_Probability)
Train_BPP_RS = np.full(Y_Train.shape, Train_B_Predicted_Probability)
# Concatenate
Train_Probabilities = np.column_stack((Train_BPP_RS, Train_RPP_RS))
print(f'Training Loss: {log_loss(Y_Train, Train_Probabilities)}')

# Test Loss
# Probabilities
Test_R_Predicted_Probability = Y_Test.sum() / Y_Test.count() # Accuracy
Test_B_Predicted_Probability = 1 - Test_R_Predicted_Probability
# Reshape
Test_RPP_RS = np.full(Y_Test.shape, Test_R_Predicted_Probability)
Test_BPP_RS = np.full(Y_Test.shape, Test_B_Predicted_Probability)
# Concatenate
Test_Probabilities = np.column_stack((Test_BPP_RS, Test_RPP_RS))
print(f'Test Loss: {log_loss(Y_Test, Test_Probabilities)}')

## **Model 1**: Height, Weight, Reach

### Copying DataFrames

In [None]:
X_Train_M1 = X_Train.copy()
Y_Train_M1 = Y_Train.copy()

X_Validation_M1 = X_Validation.copy()
Y_Validation_M1 = Y_Validation.copy()

X_Test_M1 = X_Test.copy()
Y_Test_M1 = Y_Test.copy()

### Selecting Features and Outcome

In [None]:
X_Train_M1 = X_Train_M1[['R_Height', 'R_Weight', 'R_Reach', 'B_Height', 'B_Weight', 'B_Reach']]
X_Validation_M1 = X_Validation_M1[['R_Height', 'R_Weight', 'R_Reach', 'B_Height', 'B_Weight', 'B_Reach']]
X_Test_M1 = X_Test_M1[['R_Height', 'R_Weight', 'R_Reach', 'B_Height', 'B_Weight', 'B_Reach']]

### Dropping Missing Values

In [None]:
# TRAIN
print('TRAIN')
print(f'Before Drop: {X_Train_M1.shape}')
# Identify Missing Indices
X_Train_M1_Missing_Indices = X_Train_M1.index[X_Train_M1.isnull().any(axis = 1)]
# Drop Missing Indices
X_Train_M1 = X_Train_M1.drop(X_Train_M1_Missing_Indices)
Y_Train_M1 = Y_Train_M1.drop(X_Train_M1_Missing_Indices)
# Reset Index
X_Train_M1 = X_Train_M1.reset_index(drop = True)
Y_Train_M1 = Y_Train_M1.reset_index(drop = True)
print(f'After Drop: {X_Train_M1.shape}')
print('------------------------------------')

# VALIDATION
print('VALIDATION')
print(f'Before Drop: {X_Validation_M1.shape}')
# Identify Missing Indices
X_Validation_M1_Missing_Indices = X_Validation_M1.index[X_Validation_M1.isnull().any(axis = 1)]
# Drop Missing Indices
X_Validation_M1 = X_Validation_M1.drop(X_Validation_M1_Missing_Indices)
Y_Validation_M1 = Y_Validation_M1.drop(X_Validation_M1_Missing_Indices)
# Reset Index
X_Validation_M1 = X_Validation_M1.reset_index(drop = True)
Y_Validation_M1 = Y_Validation_M1.reset_index(drop = True)
print(f'After Drop: {X_Validation_M1.shape}')
print('------------------------------------')

# TEST
print('TEST')
print(f'Before Drop: {X_Test_M1.shape}')
# Identify Missing Indices
X_Test_M1_Missing_Indices = X_Test_M1.index[X_Test_M1.isnull().any(axis = 1)]
# Drop Missing Indices
X_Test_M1 = X_Test_M1.drop(X_Test_M1_Missing_Indices)
Y_Test_M1 = Y_Test_M1.drop(X_Test_M1_Missing_Indices)
# Reset Index
X_Test_M1 = X_Test_M1.reset_index(drop = True)
Y_Test_M1 = Y_Test_M1.reset_index(drop = True)
print(f'After Drop: {X_Test_M1.shape}')
print('------------------------------------')

### Building Model

In [None]:
def build_model(lr):

    tf.random.set_seed(0) 
    tf.keras.backend.clear_session()

    model = tf.keras.models.Sequential()

    # Input Layer
    model.add(tf.keras.Input(shape = (6, ), name = 'Input')) # Input Dimension

    # Output Layer
    model.add(tf.keras.layers.Dense(
        units = 1, # Output Dimension
        use_bias = True, # Add Bias Parameter (Instead of Concatenating a Column of 1's)
        activation = 'sigmoid', # Activation Function
        kernel_initializer = tf.keras.initializers.Ones(), # Weight Initializer
        bias_initializer = tf.keras.initializers.Ones(), # Bias Initializer
        name = 'Output'
        ))
    
    # Compile Model
    model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate = lr),
                  loss = 'binary_crossentropy',
                  metrics = ['accuracy'])

    return model

### Fitting Model

In [None]:
model = build_model(lr = 0.001)
history = model.fit(x = X_Train_M1, y = Y_Train_M1, validation_data = (X_Validation_M1, Y_Validation_M1), batch_size = 5, epochs = 20, verbose = 0)

### Performance

In [None]:
weights, biases = model.layers[0].get_weights()

print(f'Weights:\n {weights}')
print(f'Biases: {biases}')

Train_Predictions = model.predict(x = X_Train_M1, verbose = 0)
Train_Loss, Train_Accuracy = model.evaluate(x = X_Train_M1, y = Y_Train_M1, verbose = 0)
print(f'Train Loss: {Train_Loss:.2f} | Train Accuracy: {Train_Accuracy:.2f}')

Validation_Predictions = model.predict(x = X_Validation_M1, verbose = 0)
Validation_Loss, Validation_Accuracy = model.evaluate(x = X_Validation_M1, y = Y_Validation_M1, verbose = 0)
print(f'Validation Loss: {Validation_Loss:.2f} | Validation Accuracy: {Validation_Accuracy:.2f}')

### Plotting Performance

In [None]:
plt.plot(history.history['loss'], label = 'Train')
plt.plot(history.history['val_loss'], label = 'Validation')
plt.xlabel('Epoch')
plt.ylabel('Log Loss')
plt.xticks(np.arange(len(history.history['loss'])))
plt.title('Loss Over Epochs', fontweight = 'bold')
plt.legend()
plt.show()

### Hyperparameter Tuning (To-Do)

In [None]:
###

### Test Performance

In [None]:
Test_Predictions = model.predict(x = X_Test_M1, verbose = 0)
Test_Loss, Test_Accuracy = model.evaluate(x = X_Test_M1, y = Y_Test_M1, verbose = 0)
print(f'Test Loss: {Test_Loss:.2f} | Test Accuracy: {Test_Accuracy:.2f}')

## **Model 2**: Height, Weight, Reach + Attributes

### Copying DataFrames

In [None]:
X_Train_M2 = X_Train.copy()
Y_Train_M2 = Y_Train.copy()

X_Validation_M2 = X_Validation.copy()
Y_Validation_M2 = Y_Validation.copy()

X_Test_M2 = X_Test.copy()
Y_Test_M2 = Y_Test.copy()

### Selecting Features and Outcome

In [None]:
X_Train_M2 = X_Train_M2[['R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']]
X_Validation_M2 = X_Validation_M2[['R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']]
X_Test_M2 = X_Test_M2[['R_Age', 'R_Height', 'R_Weight', 'R_Reach', 'R_SSLPM', 'R_SAC', 'R_SSAPM', 'R_SD', 'R_TAV', 'R_TAC', 'R_TD','R_SAV', 'B_Age', 'B_Height', 'B_Weight', 'B_Reach', 'B_SSLPM', 'B_SAC', 'B_SSAPM', 'B_SD', 'B_TAV', 'B_TAC', 'B_TD', 'B_SAV']]

### Dropping Missing Values

In [None]:
# TRAIN
print('TRAIN')
print(f'Before Drop: {X_Train_M2.shape}')
# Identify Missing Indices
X_Train_M2_Missing_Indices = X_Train_M2.index[X_Train_M2.isnull().any(axis = 1)]
# Drop Missing Indices
X_Train_M2 = X_Train_M2.drop(X_Train_M2_Missing_Indices)
Y_Train_M2 = Y_Train_M2.drop(X_Train_M2_Missing_Indices)
# Reset Index
X_Train_M2 = X_Train_M2.reset_index(drop = True)
Y_Train_M2 = Y_Train_M2.reset_index(drop = True)
print(f'After Drop: {X_Train_M2.shape}')
print('------------------------------------')

# VALIDATION
print('VALIDATION')
print(f'Before Drop: {X_Validation_M2.shape}')
# Identify Missing Indices
X_Validation_M2_Missing_Indices = X_Validation_M2.index[X_Validation_M2.isnull().any(axis = 1)]
# Drop Missing Indices
X_Validation_M2 = X_Validation_M2.drop(X_Validation_M2_Missing_Indices)
Y_Validation_M2 = Y_Validation_M2.drop(X_Validation_M2_Missing_Indices)
# Reset Index
X_Validation_M2 = X_Validation_M2.reset_index(drop = True)
Y_Validation_M2 = Y_Validation_M2.reset_index(drop = True)
print(f'After Drop: {X_Validation_M2.shape}')
print('------------------------------------')

# TEST
print('TEST')
print(f'Before Drop: {X_Test_M2.shape}')
# Identify Missing Indices
X_Test_M2_Missing_Indices = X_Test_M2.index[X_Test_M2.isnull().any(axis = 1)]
# Drop Missing Indices
X_Test_M2 = X_Test_M2.drop(X_Test_M2_Missing_Indices)
Y_Test_M2 = Y_Test_M2.drop(X_Test_M2_Missing_Indices)
# Reset Index
X_Test_M2 = X_Test_M2.reset_index(drop = True)
Y_Test_M2 = Y_Test_M2.reset_index(drop = True)
print(f'After Drop: {X_Test_M2.shape}')
print('------------------------------------')

### Building Model

In [None]:
def build_model(lr):

    tf.random.set_seed(0) 
    tf.keras.backend.clear_session()

    model = tf.keras.models.Sequential()

    # Input Layer
    model.add(tf.keras.Input(shape = (24, ), name = 'Input')) # Input Dimension

    # Output Layer
    model.add(tf.keras.layers.Dense(
        units = 1, # Output Dimension
        use_bias = True, # Add Bias Parameter (Instead of Concatenating a Column of 1's)
        activation = 'sigmoid', # Activation Function
        kernel_initializer = tf.keras.initializers.Ones(), # Weight Initializer
        bias_initializer = tf.keras.initializers.Ones(), # Bias Initializer
        name = 'Output'
        ))
    
    # Compile Model
    model.compile(optimizer = tf.keras.optimizers.SGD(learning_rate = lr),
                  loss = 'binary_crossentropy',
                  metrics = ['accuracy'])

    return model

### Fitting Model

In [None]:
model = build_model(lr = 0.001)
history = model.fit(x = X_Train_M2, y = Y_Train_M2, validation_data = (X_Validation_M2, Y_Validation_M2), batch_size = 5, epochs = 20, verbose = 0)

### Performance

In [None]:
weights, biases = model.layers[0].get_weights()

print(f'Weights:\n {weights}')
print(f'Biases: {biases}')

Train_Predictions = model.predict(x = X_Train_M2, verbose = 0)
Train_Loss, Train_Accuracy = model.evaluate(x = X_Train_M2, y = Y_Train_M2, verbose = 0)
print(f'Train Loss: {Train_Loss:.2f} | Train Accuracy: {Train_Accuracy:.2f}')

Validation_Predictions = model.predict(x = X_Validation_M2, verbose = 0)
Validation_Loss, Validation_Accuracy = model.evaluate(x = X_Validation_M2, y = Y_Validation_M2, verbose = 0)
print(f'Validation Loss: {Validation_Loss:.2f} | Validation Accuracy: {Validation_Accuracy:.2f}')

### Plotting Performance

In [None]:
plt.plot(history.history['loss'], label = 'Train')
plt.plot(history.history['val_loss'], label = 'Validation')
plt.xlabel('Epoch')
plt.ylabel('Log Loss')
plt.xticks(np.arange(len(history.history['loss'])))
plt.title('Loss Over Epochs', fontweight = 'bold')
plt.legend()
plt.show()

### Hyperparameter Tuning (To-Do)

In [None]:
###

### Test Performance

In [None]:
Test_Predictions = model.predict(x = X_Test_M2, verbose = 0)
Test_Loss, Test_Accuracy = model.evaluate(x = X_Test_M2, y = Y_Test_M2, verbose = 0)
print(f'Test Loss: {Test_Loss:.2f} | Test Accuracy: {Test_Accuracy:.2f}')

## **Model 3**: Height, Weight, Reach + Attributes + Names