In [2]:
%run functions.py
run()

Button(description='Run Cells Below', style=ButtonStyle())

Output()

In [3]:
# importing the necessary libraries
import warnings
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display, clear_output, Javascript, HTML
###############################################################################################
###############################################################################################
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
###############################################################################################
###############################################################################################

In [4]:
# loading in the data
data = pd.read_excel("../data_i/processed/i_final.xlsx", index_col=0, sheet_name='Sheet3')
scores = pd.read_excel("../data_i/processed/i_loading_scores.xlsx", index_col=0)

# creating the list to store the values from the questions being asked
responses = {}
norm_weight = []

# Defining the Likert scale options
options1 = list(data['division'].unique())
options2 = [1, 2, 3]
#options2 = [1, 2, 3, 4, 5, 6, 7, 8, 9]
options3 = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# defining weights to all possible responses that can be given by the user
# for responses in the range [0.0, 0.5] and [0.6, 1.0], weight is defined between [6, 2] respectively 
weight_dict = {0.0: 6, 0.1: 5, 0.2: 4, 0.3: 3, 0.4: 2, 0.5: 1, 
               0.6: 2, 0.7: 3, 0.8: 4, 0.9: 5, 1.0: 6}

# the features that will be weighted based on the responses provided by the user. 
weighted_features_1 = [['KD', 'SIG_STR_pct', 'SLpM', 'Str_Acc', 'win_by_KO/TKO'],
                     ['SUB_ATT', 'TD_Avg', 'TD_Acc', 'pct_TD', 'win_by_Submission']]

weighted_features_2 = [['SApM', 'DIST_pct', 'Str_Def', 'fight_time', 'TD_Def'], 
                       ['Str_Acc', 'TD_Avg', 'TD_Acc', 'CTRL_RT', 'win_by_Submission']]

weighted_features_3 = [['pct_LEG', 'pct_BODY', 'pct_HEAD', 'win_by_KO/TKO', 'win_by_Decision_Unanimous',
                       'win_by_Decision_Majority'], 
                       ['Sub_Avg', 'SUB_ATT', 'TD_Def', 'TD_Def', 'SApM', 'win_by_Submission']]

# UFC Recommender System

## Question 1: 
</h3>Select your preferred weight class or division:</h3>

In [5]:
widgets_config = {'layout': {'width': '800px'}}

def handle_likert_scale1_change(change):
    responses['question1'] = change.new
likert_scale1 = widgets.Dropdown(options=options1, **widgets_config)
likert_scale1.observe(handle_likert_scale1_change, names='value')
display(likert_scale1)

Dropdown(layout=Layout(width='800px'), options=('Lightweight Division', 'Heavyweight Division', 'Bantamweight …

## Question 2: 
</h3>Rate your preference for a balanced [1], Striking heavy [2], or a Wrestling heavy [3] fighter</h3>

In [6]:
def handle_likert_scale2_change(change):
    responses['question2'] = change.new
likert_scale2 = widgets.IntSlider(min=1, max=3, **widgets_config)
likert_scale2.observe(handle_likert_scale2_change, names='value')
display(likert_scale2)

IntSlider(value=1, layout=Layout(width='800px'), max=3, min=1)

## Question 3: 

</h3>Are you more interested in fighters who excel in striking and have a 
high percentage of significant strikes landed, or fighters who focus more 
on grappling and takedowns, with a high average number of takedowns per fight?</h3>

In [7]:
def handle_likert_scale3_change(change):
    responses['question3'] = change.new
likert_scale3 = widgets.IntSlider(min=0, max=10, **widgets_config)
likert_scale3.observe(handle_likert_scale3_change, names='value')
display(likert_scale3)

IntSlider(value=0, layout=Layout(width='800px'), max=10)

## Question 4:
</h3>Are you more interested in fighters who have a strong defensive game, 
with high striking defense and are difficult to be taken down, 
or fighters who have an aggressive style, and a high knockout-to-win ratio?</h3>

In [8]:
def handle_likert_scale4_change(change):
    responses['question4'] = change.new
likert_scale4 = widgets.IntSlider(min=0, max=10, **widgets_config)
likert_scale4.observe(handle_likert_scale4_change, names='value')
display(likert_scale4)

IntSlider(value=0, layout=Layout(width='800px'), max=10)

## Question 5: 
</h3>Do you prefer fighters who have a well-rounded game, with good performance in 
all areas and a balanced win distribution across various methods ('Decisions, KO/TKOs,
Submissions) or fighters who specialize in specific techniques or styles, with a 
high percentage of wins by submission?</h3>

In [9]:
def handle_likert_scale5_change(change):
    responses['question5'] = change.new
likert_scale5 = widgets.IntSlider(min=0, max=10, **widgets_config)
likert_scale5.observe(handle_likert_scale5_change, names='value')
display(likert_scale5)

IntSlider(value=0, layout=Layout(width='800px'), max=10)

In [14]:
create_button_widget()

Button(description='See Results', style=ButtonStyle())

Output()

In [15]:
# get the values for the last three keys into a list,
# then scale the values to a [0, 1] range
pca_values = [x/10 for x in list(responses.values())[2::]]

In [16]:
for i, x in enumerate(pca_values):
    if i == 0:
        norm_weight.append(weight_dict[x])
        if x > 0.5:
            scores.loc[weighted_features_1[1], 'Component 1'] *= weight_dict[x] 
            scores.loc[weighted_features_1[0], 'Component 1'] /= weight_dict[x]
        else:
            scores.loc[weighted_features_1[1], 'Component 1'] /= weight_dict[x] 
            scores.loc[weighted_features_1[0], 'Component 1'] *= weight_dict[x]
    elif i == 1:
        norm_weight.append(weight_dict[x])
        if x > 0.5:
            scores.loc[weighted_features_2[0], 'Component 2'] *= weight_dict[x] 
            scores.loc[weighted_features_2[1], 'Component 2'] /= weight_dict[x]
        else:
            scores.loc[weighted_features_2[0], 'Component 2'] /= weight_dict[x] 
            scores.loc[weighted_features_2[1], 'Component 2'] *= weight_dict[x]
    else:
        norm_weight.append(weight_dict[x])
        if x > 0.5:
            scores.loc[weighted_features_3[0], 'Component 3'] *= weight_dict[x] 
            scores.loc[weighted_features_3[1], 'Component 3'] /= weight_dict[x]
        else:
            scores.loc[weighted_features_3[0], 'Component 3'] /= weight_dict[x] 
            scores.loc[weighted_features_3[1], 'Component 3'] *= weight_dict[x]

In [17]:
# Filter DataFrame based on the first question
filtered_df = data[data['division'] == responses['question1']]

# Filter the resulting DataFrame based on the second value
filtered_df = filtered_df[filtered_df['lab_num'] == responses['question2']]
filtered_df = filtered_df.loc[:, scores.index.tolist()] 

# # the combined sum of the 3 weighted loading scores is calculated
# # and added to the scores df as a column
scores['Sum'] = (scores['Component 1'] + scores['Component 2'] + scores['Component 3']) / sum(norm_weight)
scoresT = scores.T

# # the 'Sum' column is being added to the fighter's dataframe
warnings.filterwarnings("ignore", category=FutureWarning)
filtered_df = filtered_df.append(scoresT.iloc[-1])

# # vectors defined to perform the cosine similarity calculation 
vector1 = filtered_df.loc['Sum'].values.reshape(1, -1)
vector2 = filtered_df.values

# Calculate cosine similarity
similarity_matrix = cosine_similarity(vector1, vector2)

# Add similarity scores as a new column in the merged dataframe
filtered_df['Rank'] = similarity_matrix.flatten()

### The recommended Fighters are:

In [19]:
recommended_fighters = filtered_df.drop(filtered_df.index[-1])
recommended_fighters = recommended_fighters['Rank'].nlargest(5)

index_values =', '.join(map(str, recommended_fighters.index))

formatted_output = f"<span style='font-size:14pt; font-weight:bold; color:blue'>{index_values}</span>"
display(HTML(formatted_output))