In [2]:
import pandas as pd 
import plotly.graph_objects as go
import numpy as np

In [4]:
base_url = 'http://115.78.93.252/TTU/pool/donghuynh0/data_visualization_2025/final_proejct/ready/women/'
df_results = pd.read_csv(base_url + 'women_results.csv')

In [5]:
df_results

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,home_continent,away_continent,goal_difference,winner,year
0,1969-11-01,Italy,France,1,0,Euro,Europe,Europe,1,Italy,1969
1,1969-11-01,Denmark,England,4,3,Euro,Europe,Europe,1,Denmark,1969
2,1969-11-02,England,France,2,0,Euro,Europe,Europe,2,England,1969
3,1969-11-02,Italy,Denmark,3,1,Euro,Europe,Europe,2,Italy,1969
4,1975-08-25,Thailand,Australia,3,2,AFC Championship,Asia,Oceania,1,Thailand,1975
...,...,...,...,...,...,...,...,...,...,...,...
4879,2022-07-22,Sweden,Belgium,1,0,UEFA Euro,Europe,Europe,1,Sweden,2022
4880,2022-07-23,France,Netherlands,1,0,UEFA Euro,Europe,Europe,1,France,2022
4881,2022-07-26,England,Sweden,4,0,UEFA Euro,Europe,Europe,4,England,2022
4882,2022-07-27,Germany,France,2,1,UEFA Euro,Europe,Europe,1,Germany,2022


In [16]:
# group by decade and cal mean
df_score = df_results.groupby((df_results['year'] // 10) * 10)[['home_score', 'away_score']].mean()
df_score.index.name = 'decade'

fig = go.Figure()

# home score
fig.add_trace(go.Scatter(
    x=df_score.index,
    y=df_score['home_score'],
    name='Home',
    mode='lines+markers',
    hovertemplate='Decade: <b>%{x}</b> <br> Home Avg: <b>%{y:.2f}</b> <extra></extra>'
))

# away score
fig.add_trace(go.Scatter(
    x=df_score.index,
    y=df_score['away_score'],
    name="Away",
    mode='lines+markers',
    hovertemplate='Decade: <b>%{x}</b> <br> Away Avg: <b>%{y:.2f}</b> <extra></extra>'
))

fig.update_layout(
    title='Average Goals per Match by Decade',
    xaxis_title='Decade',
    yaxis_title='Average Goals',
    template='plotly_white',
    width=900,
    height=500
)

fig.show()


In [17]:
# Group by decade and cal mean
df_diff = df_results.groupby((df_results['year'] // 10) * 10)['goal_difference'].mean()

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df_diff.index,
    y=df_diff.values,
    mode='lines+markers',
    hovertemplate=' Decade: <b>%{x}</b> <br> Diff Avg: <b>%{y:.2f}</b><extra></extra> '
))

fig.update_layout(
    title='Average Goal Difference by Decade',
    xaxis_title='Year',
    yaxis_title='Average Goal Difference',
    template='plotly_white',
    width=800,
    height=500
)

fig.show()

In [18]:
tournament_counts = df_results['tournament'].value_counts()

top_tournaments = tournament_counts.nlargest(5)

pie_data = top_tournaments.reset_index()
pie_data.columns = ['tournament', 'match_count']

fig = go.Figure(
    data=[go.Pie(
        labels=pie_data['tournament'],
        values=pie_data['match_count'],
        hovertemplate="<b>%{label}</b><br>Matches: %{value}<extra></extra>"
    )]
)

fig.update_layout(
    title_text='Match Distribution Among Top 5 Tournaments'
)

fig.show()


In [19]:
matches_per_year = df_results['year'].value_counts().sort_index()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=matches_per_year.index,
    y=matches_per_year.values,
    mode='lines',
    hovertemplate=" Year: <b>%{x}</b> <br> Matches: <b>%{y}</b> <extra></extra>"
))

fig.update_layout(
    title='Total Football Matches Played per Year (1872-2022)',
    xaxis_title='Year',
    yaxis_title='Number of Matches',
    template='plotly_white',
    width=900,
    height=500,)

fig.show()
