In [None]:
# Relevant libraries
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from bokeh.io import curdoc
from bokeh.models import HoverTool, NumeralTickFormatter, CategoricalColorMapper, Div, ColumnDataSource, 
from bokeh.models import Slider, Select, CheckboxButtonGroup, TextInput, CDSView, GroupFilter
from bokeh.models.widgets import CheckboxGroup, RadioButtonGroup, RangeSlider
from bokeh.palettes import Blues4, Spectral6
from bokeh.plotting import figure
from bokeh.layouts import column, row, layout

# Global settings
pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format

In [None]:
# prepare dataframes and set unique identifier to select between field players and goalkeepers later on

df_fp = pd.read_csv('data/fp_data_final_connected.csv', index_col=0)
df_fp["identifier"] = "fp"

df_gk = pd.read_csv('data/gk_data_final_connected.csv', index_col=0)
df_gk["identifier"] = "gk"
df_gk.main_position = df_gk.main_position.replace({'Torwart': "Goalkeeper"})

# creating column for over- and undervaluation
df_fp.loc[df_fp['difference'] >= 5000000, "residual_valuation"] = "Highly Undervalued by FIFA20"
df_fp.loc[(df_fp['difference'] < 5000000) & (df_fp['difference'] >= 1000000), "residual_valuation"] = "Undervalued by FIFA20"
df_fp.loc[(df_fp['difference'] < 1000000) & (df_fp['difference'] >= -1000000), "residual_valuation"] = "Similar Valuation"
df_fp.loc[(df_fp['difference'] < -1000000) & (df_fp['difference'] >= -4000000), "residual_valuation"] = "Overvalued by FIFA20"
df_fp.loc[(df_fp['difference'] < -4000000), "residual_valuation"] = "Highly Overvalued by FIFA20"

df_gk.loc[df_gk['difference'] >= 2500000, "residual_valuation"] = "Highly Undervalued by FIFA20"
df_gk.loc[(df_gk['difference'] < 2500000) & (df_gk['difference'] >= 700000), "residual_valuation"] = "Undervalued by FIFA20"
df_gk.loc[(df_gk['difference'] < 700000) & (df_gk['difference'] >= -700000), "residual_valuation"] = "Similar Valuation"
df_gk.loc[(df_gk['difference'] < -700000) & (df_gk['difference'] >= -1500000), "residual_valuation"] = "Overvalued by FIFA20"
df_gk.loc[df_gk['difference'] < -1500000, "residual_valuation"] = "Highly Overvalued by FIFA20"

## put dataframes together, so that only one data source is used for plotting
df_final = df_fp.append(df_gk, sort=False)
df_final.fillna(0, inplace=True)

In [None]:
# define the data source
datasource = ColumnDataSource(data={
    "transfermarkt_mw" : df_final.actual_market_value,
    "predicted_mw" : df_final.predicted_market_value,
    "continent" : df_final.geographical_continent,
    "name" : df_final.long_name,
    "age" : df_final.player_age,
    "overall" : df_final.overall,
    "difference" : df_final.difference,
    "valuation": df_final.residual_valuation,
    "position" : df_final.main_position
})

# build filter for over- and undervaluation
high_underval_filter = [GroupFilter(column_name='valuation', group='Highly Undervalued by FIFA20')]
high_underval_view = CDSView(source=datasource,filters=high_underval_filter)

underval_filter = [GroupFilter(column_name='valuation', group='Undervalued by FIFA20')]
underval_view = CDSView(source=datasource,filters=underval_filter)

accept_range_filter = [GroupFilter(column_name='valuation', group='Similar Valuation')]
accept_range_view = CDSView(source=datasource,filters=accept_range_filter)

overval_filter = [GroupFilter(column_name='valuation', group='Overvalued by FIFA20')]
overval_view = CDSView(source=datasource, filters=overval_filter)

high_overval_filter = [GroupFilter(column_name='valuation', group='Highly Overvalued by FIFA20')]
high_overval_view = CDSView(source=datasource, filters=high_overval_filter)

# tooltips

TOOLTIPS = """
    <div>
        <div>
                <span style="font-size: 14px; font-weight: bold;">@name</span>
        </div>
        <div>
                <span style="font-size: 12px;">Age: @age</span>
        </div>
        <div>
                <span style="font-size: 12px;">Main Position: @position</span>
        </div>
        <div>
                <span style="font-size: 12px;">FIFA-Rating: @overall</span>
        </div>
        <div>
                <span style="font-size: 12px;">Transfermarkt MV: @transfermarkt_mw{0,0}</span>
        </div>
        <div>
                <span style="font-size: 12px;">Predicted MV: @predicted_mw{0,0}</span>
        </div>
"""

# toolbar

select_tools = ["pan,wheel_zoom,box_zoom,reset,save,crosshair"]

# define our basic plot
plot = figure(title='', 
              x_axis_label='Actual market value in €', 
              y_axis_label='Predicted market value in €',
              x_range=(0, 26000000), 
              y_range=(0, 26000000),
              y_axis_location="right",
              plot_height=600, plot_width=1400,
              tools=select_tools,
              toolbar_location="below",
              tooltips=TOOLTIPS)

plot.outline_line_width = 2
plot.outline_line_alpha = 1
plot.outline_line_color = "black"
plot.toolbar.autohide = True
    
# draw actual data as a circle glyph on the plot
plot.circle("transfermarkt_mw", "predicted_mw", source=datasource, size=8,
            color='seagreen', legend='Highly Undervalued by FIFA20', view=high_underval_view)

plot.circle("transfermarkt_mw", "predicted_mw", source=datasource, size=8,
            color='yellowgreen', legend='Undervalued by FIFA20', view=underval_view)

plot.circle("transfermarkt_mw", "predicted_mw", source=datasource, size=8,
            color='cornflowerblue', legend='Similar Valuation', view=accept_range_view)

plot.circle("transfermarkt_mw", "predicted_mw", source=datasource, size=8,
            color='coral', legend='Overvalued by FIFA20', view=overval_view)

plot.circle("transfermarkt_mw", "predicted_mw", source=datasource, size=8,
            color='maroon', legend='Highly Overvalued by FIFA20', view=high_overval_view)

# legend configuration
plot.legend.location ='top_left'
plot.legend.title = 'Click legend to hide/display data'
plot.legend.title_text_font_style = "bold"
plot.legend.title_text_font_size = "12pt"
plot.legend.label_text_font = "arial"
plot.legend.label_text_font_size = "10pt"
plot.legend.background_fill_color = "ghostwhite"
plot.legend.border_line_color = "black"
plot.legend.border_line_width = 4
plot.legend.border_line_alpha = 0.5

plot.legend.click_policy = 'hide'

# convert axis to readable format
plot.yaxis.formatter=NumeralTickFormatter(format="0,0")
plot.xaxis.formatter=NumeralTickFormatter(format="0,0")

# configurate axis ticks
plot.xaxis.axis_label_text_font_size = "15pt"
plot.xaxis.major_label_text_font_size = "12pt"
plot.xaxis.axis_label_text_font_style = "normal"
plot.xaxis.axis_label_text_font = "arial"
plot.xaxis.axis_label_text_color = "black"


plot.yaxis.axis_label_text_font_size = "15pt"
plot.yaxis.major_label_text_font_size = "12pt"
plot.yaxis.axis_label_text_font_style = "normal"
plot.yaxis.axis_label_text_font = "arial"
plot.yaxis.axis_label_text_color = "black"



###########

# add widgets to plot
# widgets update the plot via the below defined update function


# field players or goalkeepers?
datasource_choice = RadioButtonGroup(labels=["Field players", "Goalkeepers",], active=0)

# age intervals
age_slider = RangeSlider(start = 18, end = 40, step = 1, value = (18,30), title = 'Player Age')

# overall ratings
overall_slider = RangeSlider(start = 45, end = 99, step = 1, value = (45,99), title = 'Overall FIFA rating')

# position selection
all_positions = df_final.main_position.unique().tolist()
positions_checkbox = CheckboxGroup(labels=all_positions, active = [0,1,2,3,4,5,6,7,8,9,10,11])

# text input for player search (player names were converted to lowercase due to improved usability)
player_textbox = TextInput(title="Player name contains")

# define update function, so that users can interact with the plot and data is updated after every input

# select function

def select_df():
    
    lower_boundary_age = age_slider.value[0]
    upper_boundary_age = age_slider.value[1]
    
    lower_boundary_overall = overall_slider.value[0]
    upper_boundary_overall = overall_slider.value[1]
    
    selected_positions = [positions_checkbox.labels[i] for i in positions_checkbox.active]
    
    textbox_input = player_textbox.value.strip()
    
    datasource_selection = datasource_choice.active
    
    if datasource_selection == 0:
           
        new_df = df_final[
            (df_final["player_age"] >= lower_boundary_age) & 
            (df_final["player_age"] <= upper_boundary_age) &
            (df_final["overall"] >= lower_boundary_overall) &
            (df_final["overall"] <= upper_boundary_overall) &
            (df_final["main_position"].isin(selected_positions)) &
            (df_final["long_name"].str.contains(textbox_input)) &
            (df_final["identifier"]=="fp")
        ]
    
    else:
        
        new_df = df_final[
            (df_final["player_age"] >= lower_boundary_age) & 
            (df_final["player_age"] <= upper_boundary_age) &
            (df_final["overall"] >= lower_boundary_overall) &
            (df_final["overall"] <= upper_boundary_overall) &
            (df_final["main_position"].isin(selected_positions)) &
            (df_final["long_name"].str.contains(textbox_input)) &
            (df_final["identifier"]=="gk")
        ]
        
    return new_df
    



def update_plot():
    
    updated_df = select_df()
    
    datasource.data = dict(
        name = updated_df["long_name"],
        age = updated_df["player_age"],
        overall = updated_df["overall"],
        position = updated_df["main_position"],
        continent = updated_df["geographical_continent"],
        transfermarkt_mw = updated_df["actual_market_value"],
        predicted_mw = updated_df["predicted_market_value"],
        difference = updated_df["difference"],
        valuation = updated_df["residual_valuation"]
    )
    

value_changes = [age_slider, overall_slider, player_textbox]
for change in value_changes:
    change.on_change('value', lambda attr, old, new: update_plot())

active_changes = [positions_checkbox, datasource_choice]
for change in active_changes:
    change.on_change('active', lambda attr, old, new: update_plot())



# add a html description to the layout
html_description = Div(text=open("___dashboard_description.html").read(), sizing_mode="stretch_width")

# Make a row layout of widgetbox(slider) and plot and add it to the current document
widgets=[datasource_choice, age_slider, overall_slider, positions_checkbox, player_textbox]
inputs = column(*widgets, width=400, height=1000)
inputs.sizing_mode = "fixed"

layout = column(html_description, row(inputs, plot),sizing_mode='scale_width')

curdoc().add_root(layout)
curdoc().title = 'FC Python Visualization'