Fathan Askar(1301213374), Muhammad Antarez Qhadafi(1301210572), Hygea Saveria(1301210385)

In [224]:
import pandas as pd
from math import pi, cos, sin

# Bokeh Library
from bokeh.io import output_file, output_notebook, show
from bokeh.models import ColumnDataSource, HoverTool, Legend, TabPanel, Tabs, NumeralTickFormatter
from bokeh.layouts import row, column, gridplot
from bokeh.plotting import figure
from bokeh.palettes import Category20, Category20c, YlOrBr, Spectral10  # Import palettes
from bokeh.transform import cumsum, dodge

In [225]:
# Define the file paths
WorldCupMatches_path = r'C:\Kuliah Tel U\archive (16)\WorldCupMatches.csv'
WorldCupPlayers_path = r'C:\Kuliah Tel U\archive (16)\WorldCupPlayers.csv'
WorldCups_path = r'C:\Kuliah Tel U\archive (16)\WorldCups.csv'
worldCupFull_path = r'C:\Kuliah Tel U\archive (17)\FIFA World Cup All Goals 1930-2022.csv'

# Read the csv files with error handling and delimiter
try:
    WorldCupMatches = pd.read_csv(WorldCupMatches_path, sep=';', on_bad_lines='skip')
    print("WorldCupMatches read successfully")
except Exception as e:
    print(f"Error reading {WorldCupMatches_path}: {e}")

try:
    WorldCupPlayers = pd.read_csv(WorldCupPlayers_path, sep=',', on_bad_lines='skip')
    print("WorldCupPlayers read successfully")
except Exception as e:
    print(f"Error reading {WorldCupPlayers_path}: {e}")

try:
    WorldCups = pd.read_csv(WorldCups_path, sep=',', on_bad_lines='skip')
    print("WorldCups read successfully")
except Exception as e:
    print(f"Error reading {WorldCups_path}: {e}")

try:
    worldCupFull = pd.read_csv(worldCupFull_path, encoding='latin1', sep=';', on_bad_lines='skip')
    print("worldCupFull read successfully")
except Exception as e:
    print(f"Error reading {worldCupFull_path}: {e}")

WorldCupMatches read successfully
WorldCupPlayers read successfully
WorldCups read successfully
worldCupFull read successfully


In [226]:
# Output inline in the notebook
output_file('World Cup.html',
            title='World Cup Statistics')

# Combine Germany FR and Germany wins
WorldCups['Winner'] = WorldCups['Winner'].replace('Germany FR', 'Germany')

# Extract and count the number of wins for each country, along with the years they won
winner_counts = WorldCups.groupby('Winner').agg({'Year': lambda x: list(x), 'Winner': 'count'}).rename(columns={'Winner': 'Wins'}).reset_index()

# Add a column with the angle for each wedge
winner_counts['Angle'] = winner_counts['Wins'] / winner_counts['Wins'].sum() * 2 * pi

# Sort winner_counts by Wins in descending order
winner_counts = winner_counts.sort_values(by='Wins', ascending=False).reset_index(drop=True)

# Define a function to generate shades from yellow to blue
def generate_yellow_to_blue_shades(num_shades):
    shades = []
    for i in range(num_shades):
        # Linear interpolation from yellow to blue
        factor = i / (num_shades - 1)
        yellow_component = 255 * (1 - factor)  # Decrease yellow component
        white_component = 255 * factor  # Increase white component
        color = (yellow_component, yellow_component, white_component)
        shades.append('#{:02x}{:02x}{:02x}'.format(int(color[0]), int(color[1]), int(color[2])))
    return shades

# Generate shades from yellow to blue based on the number of unique countries
num_countries = len(winner_counts)
yellow_to_blue_shades = generate_yellow_to_blue_shades(num_countries)

# Assign the shades to each country based on their rank
winner_counts['Color'] = yellow_to_blue_shades

# Prepare data for plotting
source = ColumnDataSource(winner_counts)

# Create a pie chart
output_notebook()

win = figure(height=400, width=640, title="World Cup Wins by Country",
           toolbar_location=None, tools="hover", tooltips="@Country: @Wins wins (@Year)", x_range=(-0.5, 1.0))

win.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('Angle', include_zero=True), end_angle=cumsum('Angle'),
        line_color="white", fill_color='Color', legend_field='Winner', source=source)

win.axis.axis_label = None
win.axis.visible = False
win.grid.grid_line_color = None

# Add hover tool with animation
hover = win.select(dict(type=HoverTool))
hover.tooltips = [("Country", "@Winner"), ("Wins", "@Wins"), ("Years", "@Year")]
hover.mode = 'mouse'

# Show the plot
show(win)

In [227]:
# Create a dictionary with the continent mappings
continent_map = {
    'T-01': 'Asia',
    'T-02': 'Africa',
    'T-03': 'South America',
    'T-04': 'Asia',
    'T-05': 'Europe',
    'T-06': 'Europe',
    'T-07': 'South America',
    'T-08': 'Europe',
    'T-09': 'South America',
    'T-10': 'Europe',
    'T-11': 'Africa',
    'T-12': 'North America',
    'T-13': 'South America',
    'T-15': 'South America',
    'T-16': 'North America',
    'T-17': 'Europe',
    'T-18': 'North America',
    'T-19': 'Europe',
    'T-20': 'Europe',
    'T-21': 'Europe',
    'T-23': 'Europe',
    'T-24': 'South America',
    'T-25': 'Africa',
    'T-26': 'South America',
    'T-27': 'Europe',
    'T-28': 'Europe',
    'T-29': 'Europe',
    'T-30': 'Africa',
    'T-31': 'Europe',
    'T-32': 'North America',
    'T-33': 'North America',
    'T-34': 'Europe',
    'T-35': 'Europe',
    'T-36': 'Asia',
    'T-37': 'Asia',
    'T-38': 'Asia',
    'T-39': 'Europe',
    'T-40': 'Africa',
    'T-41': 'North America',
    'T-42': 'Asia',
    'T-43': 'Asia',
    'T-44': 'North America',
    'T-45': 'Africa',
    'T-46': 'Europe',
    'T-47': 'Asia',
    'T-48': 'Africa',
    'T-49': 'Asia',
    'T-50': 'Europe',
    'T-51': 'Europe',
    'T-52': 'North America',
    'T-53': 'South America',
    'T-54': 'South America',
    'T-55': 'Europe',
    'T-56': 'Europe',
    'T-57': 'Asia',
    'T-58': 'Europe',
    'T-59': 'Europe',
    'T-60': 'Europe',
    'T-61': 'Asia',
    'T-62': 'Europe',
    'T-63': 'Africa',
    'T-64': 'Europe',
    'T-65': 'Europe',
    'T-66': 'Europe',
    'T-67': 'Europe',
    'T-68': 'Africa',
    'T-69': 'Asia',
    'T-70': 'Europe',
    'T-71': 'Europe',
    'T-72': 'Europe',
    'T-73': 'Europe',
    'T-74': 'Africa',
    'T-75': 'South America',
    'T-76': 'Africa',
    'T-77': 'Asia',
    'T-78': 'Europe',
    'T-79': 'Asia',
    'T-80': 'North America',
    'T-81': 'South America',
    'T-82': 'Europe',
    'T-83': 'Europe',
    'T-84': 'Europe'
}

worldCupFull['continent'] = worldCupFull['team_id'].map(continent_map)

In [228]:
# Convert 'match_date' to datetime
worldCupFull['match_date'] = pd.to_datetime(worldCupFull['match_date'])

# Extract year from 'match_date'
worldCupFull['year'] = worldCupFull['match_date'].dt.year

In [229]:
# List of known World Cup winners
winning_teams = ['Brazil', 'Germany', 'Italy', 'Argentina', 'Uruguay', 'France', 'England', 'Spain']

# Filter data to include only matches played by the winning teams
worldCupWinners = worldCupFull[worldCupFull['team_name'].isin(winning_teams)]

# Checking the data structure
worldCupWinners

Unnamed: 0,key_id,goal_id,tournament_id,tournament_name,match_id,match_name,match_date,stage_name,group_name,team_id,...,player_team_name,player_team_code,minute_label,minute_regulation,minute_stoppage,match_period,own_goal,penalty,continent,year
0,1,G-0001,WC-1930,1930 FIFA World Cup,M-1930-01,France v Mexico,1930-07-13,group stage,Group 1,T-28,...,France,FRA,19',19,0,first half,0,0,Europe,1930
1,2,G-0002,WC-1930,1930 FIFA World Cup,M-1930-01,France v Mexico,1930-07-13,group stage,Group 1,T-28,...,France,FRA,40',40,0,first half,0,0,Europe,1930
2,3,G-0003,WC-1930,1930 FIFA World Cup,M-1930-01,France v Mexico,1930-07-13,group stage,Group 1,T-28,...,France,FRA,43',43,0,first half,0,0,Europe,1930
4,5,G-0005,WC-1930,1930 FIFA World Cup,M-1930-01,France v Mexico,1930-07-13,group stage,Group 1,T-28,...,France,FRA,87',87,0,second half,0,0,Europe,1930
10,11,G-0011,WC-1930,1930 FIFA World Cup,M-1930-03,Yugoslavia v Brazil,1930-07-14,group stage,Group 2,T-09,...,Brazil,BRA,62',62,0,second half,0,0,South America,1930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2715,2716,G-2716,WC-2022,2022 FIFA World Cup,M-2022-64,Argentina v France,2022-12-18,final,not applicable,T-03,...,Argentina,ARG,36',36,0,first half,0,0,South America,2022
2716,2717,G-2717,WC-2022,2022 FIFA World Cup,M-2022-64,Argentina v France,2022-12-18,final,not applicable,T-28,...,France,FRA,80',80,0,second half,0,1,Europe,2022
2717,2718,G-2718,WC-2022,2022 FIFA World Cup,M-2022-64,Argentina v France,2022-12-18,final,not applicable,T-28,...,France,FRA,81',81,0,second half,0,0,Europe,2022
2718,2719,G-2719,WC-2022,2022 FIFA World Cup,M-2022-64,Argentina v France,2022-12-18,final,not applicable,T-03,...,Argentina,ARG,108',108,0,"extra time, second half",0,0,South America,2022


In [230]:
# Group by year, team, and continent and count the number of goals
goals_per_team = worldCupWinners.groupby(['year', 'team_name', 'continent']).size().reset_index(name='goals')

# Check the aggregated data
goals_per_team

Unnamed: 0,year,team_name,continent,goals
0,1930,Argentina,South America,18
1,1930,Brazil,South America,5
2,1930,France,Europe,4
3,1930,Uruguay,South America,15
4,1934,Argentina,South America,2
...,...,...,...,...
124,2022,England,Europe,13
125,2022,France,Europe,16
126,2022,Germany,Europe,6
127,2022,Spain,Europe,9


In [231]:
output_notebook()

# Create a ColumnDataSource
source = ColumnDataSource(goals_per_team)

# Create the figure
winner_goals_figure = figure(title="World Cup Goals by Team", x_axis_label='Year', y_axis_label='Goals', width=800, height=400, tools="pan,wheel_zoom,box_zoom,reset")

# Add a line renderer with line thickness
teams = goals_per_team['team_name'].unique()
colors = Category20[len(teams)]  # Use a color palette with enough distinct colors
renderers = []

for team, color in zip(teams, colors):
    team_data = goals_per_team[goals_per_team['team_name'] == team]
    renderer = winner_goals_figure.line(x='year', y='goals', source=ColumnDataSource(team_data), line_width=2, color=color)
    renderers.append(renderer)

# Create a legend
legend = Legend(items=[(team, [renderer]) for team, renderer in zip(teams, renderers)])
winner_goals_figure.add_layout(legend, 'right')

# Add hover tool
hover = HoverTool()
hover.tooltips = [("Team", "@team_name"), ("Year", "@year"), ("Goals", "@goals"), ("Continent", "@continent")]
winner_goals_figure.add_tools(hover)

# Set the location and click policy for the legend
legend.location = "top_right"
legend.click_policy = "hide"  # Clicking on the legend item will hide the corresponding line

# Show the plot
show(winner_goals_figure)


In [232]:
output_notebook()

# Calculate cumulative goals
goals_per_team['cumulative_goals'] = goals_per_team.groupby('team_name')['goals'].cumsum()

# Create a ColumnDataSource
source = ColumnDataSource(goals_per_team)

# Create the figure
winner_goals_cumulative_figure = figure(title="Cumulative World Cup Goals by Team", x_axis_label='Year', y_axis_label='Cumulative Goals', width=800, height=400, tools="pan,wheel_zoom,box_zoom,reset")

# Add a line renderer with line thickness
teams = goals_per_team['team_name'].unique()
colors = Category20[len(teams)]  # Use a color palette with enough distinct colors
renderers = []

for team, color in zip(teams, colors):
    team_data = goals_per_team[goals_per_team['team_name'] == team]
    renderer = winner_goals_cumulative_figure.line(x='year', y='cumulative_goals', source=ColumnDataSource(team_data), line_width=2, color=color)
    renderers.append(renderer)

# Create a legend
legend = Legend(items=[(team, [renderer]) for team, renderer in zip(teams, renderers)])
winner_goals_cumulative_figure.add_layout(legend, 'right')

# Add hover tool
hover = HoverTool()
hover.tooltips = [("Team", "@team_name"), ("Year", "@year"), ("Cumulative Goals", "@cumulative_goals"), ("Continent", "@continent")]
winner_goals_cumulative_figure.add_tools(hover)

# Set the location and click policy for the legend
legend.location = "top_right"
legend.click_policy = "hide"  # Clicking on the legend item will hide the corresponding line

# Show the plot
show(winner_goals_cumulative_figure)


In [233]:
# Output to file
output_file('cumulative_and_independent_winner_goals.html', 
            title='World Cup Winners Goals Every Year and Cumulatively')

winner_goals_figure.width = winner_goals_cumulative_figure.width = 640
winner_goals_figure.height = winner_goals_cumulative_figure.height = 400

# Create two panels, one for each graph
independent = TabPanel(child=winner_goals_figure, title='World Cup Goals Every World Cup by Team')
cumulative = TabPanel(child=winner_goals_cumulative_figure, title='Cumulative World Cup Goals by Winners')

# Assign the panels to Tabs
tabs = Tabs(tabs=[independent, cumulative])

# Show the tabbed layout
show(tabs)

In [234]:
# Enable Bokeh output in the notebook
output_notebook()

# Convert 'match_date' to datetime
worldCupFull['match_date'] = pd.to_datetime(worldCupFull['match_date'], format='%m/%d/%Y')

# Extract year from 'match_date'
worldCupFull['year'] = worldCupFull['match_date'].dt.year

# List of teams that made it past the group stage
teams_past_group_stage = worldCupFull[worldCupFull['stage_name'] == 'round of 16']

# Count how many times each team has made it past the group stage
team_counts = teams_past_group_stage['team_name'].value_counts().reset_index()
team_counts.columns = ['team_name', 'count']

# Create a ColumnDataSource
source = ColumnDataSource(team_counts)

# Create the bar chart figure
bar_chart = figure(title="Teams That Made It Past the Group Stage in World Cup", 
                   x_axis_label='Teams', 
                   y_axis_label='Number of Times Past Group Stage',
                   x_range=team_counts['team_name'],
                   width=800, height=600,
                   tools="pan,wheel_zoom,box_zoom,reset")

# Add bar glyphs
bar_chart.vbar(x=dodge('team_name', -0.25, range=bar_chart.x_range), top='count', width=0.5, source=source)

# Add hover tool
hover = HoverTool()
hover.tooltips = [("Team", "@team_name"), ("Count", "@count")]
bar_chart.add_tools(hover)

# Rotate x-axis labels for better readability
bar_chart.xaxis.major_label_orientation = 1.2

# Show the plot
show(bar_chart)


In [235]:
# Enable Bokeh output in the notebook
output_notebook()

# Map the continent column based on the 'T-' values
worldCupFull['continent'] = worldCupFull['team_id'].map(continent_map)

# Filter out teams that made it past the group stage
teams_past_group_stage = worldCupFull[worldCupFull['stage_name'] == 'round of 16']

# Count how many times each continent's teams have made it past the group stage
continent_counts = teams_past_group_stage['continent'].value_counts().reset_index()
continent_counts.columns = ['continent', 'count']

# Create a ColumnDataSource
source = ColumnDataSource(continent_counts)

# Create the bar chart figure
bar_chart2 = figure(title="Teams That Made It Past the Group Stage by Continent in World Cup", 
                   x_axis_label='Continent', 
                   y_axis_label='Number of Teams Past Group Stage',
                   x_range=continent_counts['continent'],
                   width=900, height=600,
                   tools="pan,wheel_zoom,box_zoom,reset")

# Add bar glyphs
bar_chart2.vbar(x='continent', top='count', width=0.5, source=source)

# Add hover tool
hover = HoverTool()
hover.tooltips = [("Continent", "@continent"), ("Count", "@count")]
bar_chart2.add_tools(hover)

# Show the plot
show(bar_chart2)

In [236]:
# Output to file
output_file('teams_and_continents_past_ro16.html', 
            title='Teams and Continents that make it past the group stage')

bar_chart.width = bar_chart2.width = 640
bar_chart.height = bar_chart2.height = 400

# Create two panels, one for each graph
country = TabPanel(child=bar_chart, title='Teams That Made It Past the Group Stage in World Cup')
continent = TabPanel(child=bar_chart2, title='Teams That Made It Past the Group Stage by Continent')

# Assign the panels to Tabs
tabs2 = Tabs(tabs=[country, continent])

# Show the tabbed layout
show(tabs2)

In [237]:
WorldCupMatches['Year'] = WorldCupMatches['Year'].astype(int)
std = WorldCupMatches.groupby(['Stadium', 'City'])['Attendance'].mean().reset_index().sort_values(by = 'Attendance', ascending =False)

top10 = std[:10]

source = ColumnDataSource(data=dict(
    stadium=top10['Stadium'], 
    attendance=top10['Attendance'], 
    city=top10['City'],
    color=Spectral10 
))

output_file("stadion.html")

penonton = figure(y_range=top10['Stadium'],width=640, height=400,
           title="Stadion dengan Jumlah Penonton Tertinggi", tools="pan,wheel_zoom,box_zoom,reset")

penonton.hbar(right='attendance', y='stadium', height=0.4, source=source, color='color')  # Menggunakan kolom warna

hover = HoverTool()
hover.tooltips = [("Stadium", "@stadium"),("Attendance", "@attendance{0,0}"),("City", "@city")]
penonton.add_tools(hover)

penonton.xaxis[0].formatter = NumeralTickFormatter(format="0")

penonton.xaxis.axis_label = 'Attendance'
penonton.yaxis.axis_label = 'Stadium'

show(penonton)


In [238]:
# Output to file
output_file('world-cup-gridplot.html', title='World Cup Data 1930-2022')

# Embed the tabs in the gridplot with fixed sizing mode
world_cup_gridplot = gridplot([[win, penonton], [tabs2, tabs]], toolbar_location='right', sizing_mode='fixed')

# Show the gridplot
show(world_cup_gridplot)

# # Add CSS styles for centering the visualization
# css_style = """
# <style>
# .container {
#     display: flex;
#     justify-content: center;
#     align-items: center;
#     height: 100vh;
# }
# </style>
# """

# # Append the CSS style to the output HTML file
# with open('world-cup-gridplot.html', 'a') as file:
#     file.write(css_style)






In [239]:
# Import reset_output (only needed once) 
from bokeh.plotting import reset_output

# Use reset_output() between subsequent show() calls, as needed
reset_output()