In [10]:
import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [11]:
data_df = pd.read_csv('../data/cleaned_data_ml.csv')

In [15]:
X = data_df.drop(columns=['target'])
y = data_df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.3)

In [16]:
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)

In [17]:
cv_scores = cross_val_score(clf, X, y, cv=5)
print(f'Cross Validation Accuracy {cv_scores.mean():.4f}')

Cross Validation Accuracy 0.7589


In [18]:
y_pred = clf.predict(X_test)

In [19]:
accuracy = accuracy_score(y_test, y_pred)
precision = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred) 
f1 = f1_score(y_test, y_pred)

In [20]:
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')

# Print classification report for more details
print(classification_report(y_test, y_pred))

Accuracy: 0.7491
Precision: 0.7491
Recall: 0.7339
F1-Score: 0.7406
              precision    recall  f1-score   support

           0       0.75      0.76      0.76       702
           1       0.75      0.73      0.74       669

    accuracy                           0.75      1371
   macro avg       0.75      0.75      0.75      1371
weighted avg       0.75      0.75      0.75      1371



In [35]:
new_data = pd.read_csv('../data/fight_comp_data.csv')
new_data

Unnamed: 0,fighter_A_stance_Orthodox,fighter_A_stance_Southpaw,fighter_A_stance_Switch,fighter_B_stance_Orthodox,fighter_B_stance_Southpaw,fighter_B_stance_Switch,is_title_fight_True,is_male_fight_True,weight_class_featherweight,weight_class_flyweight,...,diff_takedown_accuracy,diff_head_strike_ratio,diff_body_strike_ratio,diff_leg_strike_ratio,diff_fight_duration,diff_win_rate,diff_knockdown_percentage,diff_ko_rate,diff_submission_rate,diff_finish_rate
0,0,0,1,0,0,1,1,1,0,0,...,-0.334603,-0.005065,0.016259,-0.067122,0.942674,-0.021651,0.000827,-0.214245,-0.214245,-0.322748


In [36]:
new_data_predictions = clf.predict(new_data)
print(new_data_predictions)


[0]


In [37]:
# Get probability estimates for each class
new_data_probabilities = clf.predict_proba(new_data)
print(new_data_probabilities)


[[1. 0.]]


In [38]:
from sklearn.calibration import CalibratedClassifierCV

clf_calibrated = CalibratedClassifierCV(estimator=clf, method='sigmoid')
clf_calibrated.fit(X_train, y_train)
new_data_probabilities_calibrated = clf_calibrated.predict_proba(new_data)
print(new_data_probabilities_calibrated)


[[0.75713485 0.24286515]]


In [39]:
accuracy = accuracy_score(y_test, y_pred)
precision = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred) 
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')

# Print classification report for more details
print(classification_report(y_test, y_pred))

Accuracy: 0.7491
Precision: 0.7491
Recall: 0.7339
F1-Score: 0.7406
              precision    recall  f1-score   support

           0       0.75      0.76      0.76       702
           1       0.75      0.73      0.74       669

    accuracy                           0.75      1371
   macro avg       0.75      0.75      0.75      1371
weighted avg       0.75      0.75      0.75      1371

