In [None]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#Load the UFC Data
ufc_data = pd.read_csv("data.csv")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#Update Stance Columns to be 1 for Ortho and 0 for Southpaw
ufc_data['R_Stance'] = ufc_data['R_Stance'].map({'Orthodox': 1, 'Southpaw': 0, 'Open Stance': 1, "Switch": 0})
ufc_data['B_Stance'] = ufc_data['B_Stance'].map({'Orthodox': 1, 'Southpaw': 0, 'Open Stance': 1, "Switch": 0})

#Update Winner, Red is 1, and Blue is 0
ufc_data['Winner'] = ufc_data['Winner'].map({'Red': 1, 'Blue': 0})

#Clean the data
ufc_data = ufc_data.dropna()

#Data for the Model
features = ufc_data[['B_current_lose_streak', 'R_current_lose_streak', 'B_current_win_streak', 'R_current_win_streak', 'R_Stance', 'B_Stance', 'B_age', 'R_age', 'B_Height_cms', 'B_Reach_cms', 'R_Height_cms', "R_Reach_cms", 'B_avg_SIG_STR_pct', 'R_avg_SIG_STR_pct']]
features_all = ufc_data.drop(['Winner', 'R_fighter', 'B_fighter', 'Referee', 'date', 'location', 'title_bout', 'weight_class'], axis = 1)
features_R = ufc_data[['R_current_lose_streak', 'R_current_win_streak', 'R_Stance', 'R_age', 'R_Height_cms', "R_Reach_cms", 'R_avg_SIG_STR_pct']]
winner = ufc_data['Winner']

#Split the Data
training_data, test_data, training_labels, test_labels = train_test_split(features_R, winner, test_size = 0.2)

#Scale the Data
scaler = StandardScaler()
scaled_training_data = scaler.fit_transform(training_data)
scaled_test_data = scaler.transform(test_data)

#Let's make a model
model = LogisticRegression(random_state = 1)
model.fit(scaled_training_data, training_labels)

#let's score the model
print(model.score(test_data, test_labels))

#Which Coefficient is having the biggest impact
coefficients = list(zip(features_R,model.coef_[0]))
sorted_coefficients = sorted(coefficients, key=lambda tup:tup[1])
# Uncomment below to see which Coefficients are making the most effect 
#print(sorted_coefficients)

enter_current_lose_streak = input("What is your current lose streak? ")
enter_current_win_streak = input("What is your current win streak? ")
enter_stance = input("What is your Stance? 1 is Orthodox, 0 is SouthPaw ")
enter_age = input("How old are you? ")
enter_height = input("How tall are you? ")
enter_reach = input("What is your reach in cm? ")
enter_sig_strike_pct = random.randint(40, 80) / 100

compiled_list = [int(enter_current_lose_streak), int(enter_current_win_streak), int(enter_stance), int(enter_age), int(enter_height), int(enter_reach), int(enter_sig_strike_pct)]
entered_fighter = np.array([compiled_list])
scaled_entered_fighter = scaler.transform(entered_fighter)

will_you_win = model.predict(scaled_entered_fighter)
prob_of_winning = model.predict_proba(scaled_entered_fighter)

if will_you_win == 0:
    print("Congratulations you are going to win")
else:
    print("Boom you got knocked out!")

    
#All the above does is looks at the R data and if they won. It predicts correctly 60% of the time
# the next steps- need to work out a way to calcualte reach advantage, age advatnage and do something with this significant strike stuff
#Then we can come up with functions to determine reach advantage / type of fighter advantage.





0.624405705229794
