# Predicting KO

In [43]:
y = data['KO_OVR']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

# Formatting data
x_train = x_train.values
y_train = y_train.values.ravel()
x_test = x_test.values
y_test = y_test.values.ravel()

### Setting Baseline

In [41]:
print(f'Always predicting no knockout would yield {100 - data.KO_OVR.mean()*100}% accuracy')

df = data.copy()
df['Higher_Pct'] = df.slpm_1.apply(lambda x: 1 if x > 0 else 0)
df['Result_Tracker'] = df.apply(lambda x: 1 if (x.Higher_Pct == 1) & (x.result == 1)
                               else 1 if (x.Higher_Pct == 0) & (x.result == 0)
                               else 0, axis = 1)
print(f'Predicting the fighter with the higher SLPM would yield {df.Result_Tracker.mean()*100}% accuracy')

Always predicting no knockout would yield 62.836185819070906% accuracy
Predicting the fighter with the higher SLPM would yield 57.21271393643031% accuracy


### Random Forest

In [44]:
# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
rf = grid_search_classifier(RandomForestClassifier(random_state = 0), param_grid)

Best parameters are: {'max_depth': 6, 'max_features': 8, 'n_estimators': 15}

Accuracy is: 0.524390243902439

AUC score is: 0.4155844155844156

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.58      0.76      0.65        49
Higher Ranked       0.33      0.18      0.24        33

     accuracy                           0.52        82
    macro avg       0.46      0.47      0.45        82
 weighted avg       0.48      0.52      0.49        82



### Gradient Boost

In [45]:
# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
gb = grid_search_classifier(GradientBoostingClassifier(random_state = 0), param_grid)

Best parameters are: {'max_depth': 10, 'max_features': 10, 'n_estimators': 6}

Accuracy is: 0.5121951219512195

AUC score is: 0.4001236858379716

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.57      0.76      0.65        49
Higher Ranked       0.29      0.15      0.20        33

     accuracy                           0.51        82
    macro avg       0.43      0.45      0.42        82
 weighted avg       0.46      0.51      0.47        82



### Logistic Regression

In [46]:
# Creating parameter grid

c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c
}

# Running ML function
lr = grid_search_classifier(LogisticRegression(random_state = 0, max_iter = 500), param_grid)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Best parameters are: {'C': 0.1}

Accuracy is: 0.5487804878048781

AUC score is: 0.45701917130488556

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.59      0.80      0.68        49
Higher Ranked       0.38      0.18      0.24        33

     accuracy                           0.55        82
    macro avg       0.48      0.49      0.46        82
 weighted avg       0.50      0.55      0.50        82



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


### SVM

In [47]:
# Creating parameter grid

kernel = ['rbf', 'poly', 'sigmoid']
degree = [int(x) for x in np.linspace(start = 2, stop = 7, num = 5)]
c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c,
    'kernel' : kernel,
    'degree' : degree
}

# Running ML function
svm = grid_search_classifier(SVC(random_state = 0), param_grid)

Best parameters are: {'C': 10, 'degree': 5, 'kernel': 'poly'}

Accuracy is: 0.5609756097560976

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.59      0.84      0.69        49
Higher Ranked       0.38      0.15      0.22        33

     accuracy                           0.56        82
    macro avg       0.49      0.49      0.46        82
 weighted avg       0.51      0.56      0.50        82



# Predicting SUB

### Setting Up Data

In [5]:
# Unsampling data to get a higher % of submissions

sub_data = data[data.SUB_OVR == 1]
non_sub_data = data[data.SUB_OVR == 0].sample(sub_data.shape[0])
data_sub = pd.concat([sub_data, non_sub_data])

# Splitting data for training
y_col = ['SUB_OVR']
x, y = data_sub[x_cols], data_sub[y_col]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

# Formatting data
x_train = x_train.values
y_train = y_train.values.ravel()
x_test = x_test.values
y_test = y_test.values.ravel()

### Setting Baseline

In [7]:
print(f'Always predicting no sub would yield {100 - data.SUB_OVR.mean()*100}% accuracy')

Always predicting no sub would yield 80.73593073593074% accuracy


### Random Forest

In [8]:
# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
rf = grid_search_classifier(RandomForestClassifier(random_state = 0), param_grid)

Best parameters are: {'max_depth': 6, 'max_features': 4, 'n_estimators': 13}

Accuracy is: 0.5

AUC score is: 0.40625

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.46      0.75      0.57        16
Higher Ranked       0.60      0.30      0.40        20

     accuracy                           0.50        36
    macro avg       0.53      0.53      0.49        36
 weighted avg       0.54      0.50      0.48        36



### Gradient Boost

In [9]:
# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
gb = grid_search_classifier(GradientBoostingClassifier(random_state = 0), param_grid)

Best parameters are: {'max_depth': 2, 'max_features': 3, 'n_estimators': 4}

Accuracy is: 0.4444444444444444

AUC score is: 0.45625

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.42      0.69      0.52        16
Higher Ranked       0.50      0.25      0.33        20

     accuracy                           0.44        36
    macro avg       0.46      0.47      0.43        36
 weighted avg       0.47      0.44      0.42        36



### Logistic Regression

In [10]:
# Creating parameter grid

c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c
}

# Running ML function
lr = grid_search_classifier(LogisticRegression(random_state = 0, max_iter = 500), param_grid)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

Best parameters are: {'C': 10}

Accuracy is: 0.4166666666666667

AUC score is: 0.409375

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.38      0.50      0.43        16
Higher Ranked       0.47      0.35      0.40        20

     accuracy                           0.42        36
    macro avg       0.42      0.42      0.42        36
 weighted avg       0.43      0.42      0.41        36



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


### SVM

In [11]:
# Creating parameter grid

kernel = ['rbf', 'poly', 'sigmoid']
degree = [int(x) for x in np.linspace(start = 2, stop = 7, num = 5)]
c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c,
    'kernel' : kernel,
    'degree' : degree
}

# Running ML function
svm = grid_search_classifier(SVC(random_state = 0), param_grid)

Best parameters are: {'C': 10, 'degree': 5, 'kernel': 'poly'}

Accuracy is: 0.5833333333333334

Classification report:
                precision    recall  f1-score   support

 Lower Ranked       0.54      0.44      0.48        16
Higher Ranked       0.61      0.70      0.65        20

     accuracy                           0.58        36
    macro avg       0.57      0.57      0.57        36
 weighted avg       0.58      0.58      0.58        36



In [None]:
# Predicting KO

y = data['KO_OVR']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

# Formatting data
x_train = x_train.values
y_train = y_train.values.ravel()
x_test = x_test.values
y_test = y_test.values.ravel()

### Setting Baseline

print(f'Always predicting no knockout would yield {100 - data.KO_OVR.mean()*100}% accuracy')

df = data.copy()
df['Higher_Pct'] = df.slpm_1.apply(lambda x: 1 if x > 0 else 0)
df['Result_Tracker'] = df.apply(lambda x: 1 if (x.Higher_Pct == 1) & (x.result == 1)
                               else 1 if (x.Higher_Pct == 0) & (x.result == 0)
                               else 0, axis = 1)
print(f'Predicting the fighter with the higher SLPM would yield {df.Result_Tracker.mean()*100}% accuracy')

### Random Forest

# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
rf = grid_search_classifier(RandomForestClassifier(random_state = 0), param_grid)

### Gradient Boost

# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
gb = grid_search_classifier(GradientBoostingClassifier(random_state = 0), param_grid)

### Logistic Regression

# Creating parameter grid

c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c
}

# Running ML function
lr = grid_search_classifier(LogisticRegression(random_state = 0, max_iter = 500), param_grid)

### SVM

# Creating parameter grid

kernel = ['rbf', 'poly', 'sigmoid']
degree = [int(x) for x in np.linspace(start = 2, stop = 7, num = 5)]
c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c,
    'kernel' : kernel,
    'degree' : degree
}

# Running ML function
svm = grid_search_classifier(SVC(random_state = 0), param_grid)

# Predicting SUB

### Setting Up Data

# Unsampling data to get a higher % of submissions

sub_data = data[data.SUB_OVR == 1]
non_sub_data = data[data.SUB_OVR == 0].sample(sub_data.shape[0])
data_sub = pd.concat([sub_data, non_sub_data])

# Splitting data for training
y_col = ['SUB_OVR']
x, y = data_sub[x_cols], data_sub[y_col]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

# Formatting data
x_train = x_train.values
y_train = y_train.values.ravel()
x_test = x_test.values
y_test = y_test.values.ravel()

### Setting Baseline

print(f'Always predicting no sub would yield {100 - data.SUB_OVR.mean()*100}% accuracy')

### Random Forest

# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
rf = grid_search_classifier(RandomForestClassifier(random_state = 0), param_grid)

### Gradient Boost

# Creating parameter grid
n_estimators = [int(x) for x in np.linspace(start = 3, stop = 15, num = 13)]
max_features = [int(x) for x in np.linspace(start = 3, stop = 10, num = 8)]
max_depth = [int(x) for x in np.linspace(start = 1, stop = 10, num = 10)]
param_grid = {
    'n_estimators' : n_estimators,
    'max_features' : max_features,
    'max_depth' : max_depth
}

# Running ML function
gb = grid_search_classifier(GradientBoostingClassifier(random_state = 0), param_grid)

### Logistic Regression

# Creating parameter grid

c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c
}

# Running ML function
lr = grid_search_classifier(LogisticRegression(random_state = 0, max_iter = 500), param_grid)

### SVM

# Creating parameter grid

kernel = ['rbf', 'poly', 'sigmoid']
degree = [int(x) for x in np.linspace(start = 2, stop = 7, num = 5)]
c = [0.001, 0.01, 0.1, 1, 10, 100]
param_grid = {
    'C' : c,
    'kernel' : kernel,
    'degree' : degree
}

# Running ML function
svm = grid_search_classifier(SVC(random_state = 0), param_grid)