In [None]:
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# ladowanie danych
awards = pd.read_csv("./data/awards.csv")
body_builders = pd.read_csv("./data/body_builders.csv")
coaches = pd.read_csv("./data/coaches.csv")
countries = pd.read_csv("./data/countries.csv")
matches = pd.read_csv("./data/matches.csv")
physiotherapists = pd.read_csv("./data/physiotherapists.csv")
players = pd.read_csv("./data/players.csv")
presidents = pd.read_csv("./data/presidents.csv")
press_offices = pd.read_csv("./data/press_offices.csv")
sport_directors = pd.read_csv("./data/sport_directors.csv")
stadiums = pd.read_csv("./data/stadiums.csv")
statisticians = pd.read_csv("./data/statisticians.csv")
team_mangers = pd.read_csv("./data/team_mangers.csv")
teams = pd.read_csv("./data/teams.csv")
transfers = pd.read_csv("./data/transfers.csv")

In [None]:
polish_teams = teams[teams["country_code"] == "PL"]
polish_players = players[players["country_code"] == "PL"]

In [None]:
# merge metches with team
matches_have_id = matches["first_team_id"].notna() & matches["second_team_id"].notna()
teams_slice = teams[["country_code", "team_id", "team_name"]]
mt = matches[matches_have_id]
mt = mt.merge(teams_slice, left_on="first_team_id", right_on="team_id").rename(
    columns={"country_code": "first_team_country_code", "team_name": "first_team_name"}
)
mt = mt.merge(teams_slice, left_on="second_team_id", right_on="team_id").rename(
    columns={
        "country_code": "second_team_country_code",
        "team_name": "second_team_name",
    }
)
mt["origin_winning_country"] = np.where(
    mt["n_set_team1"] > mt["n_set_team2"],
    mt["first_team_country_code"],
    mt["second_team_country_code"],
)
plt.figure(figsize=(10, 10))

is_polish_team_against_other = ((mt['first_team_country_code'] == 'PL') | (mt['second_team_country_code'] == 'PL')) &  ~((mt['first_team_country_code'] == 'PL') & (mt['second_team_country_code'] == 'PL'))
polish_vs_other = mt[is_polish_team_against_other]
polish_vs_other['year'] = pd.to_datetime(polish_vs_other['date'], format='%m/%d/%Y').dt.year 
sns.displot(data=polish_vs_other, x="year", hue="origin_winning_country", multiple="stack")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
polish_wins = polish_vs_other[polish_vs_other['origin_winning_country'] == 'PL']
polish_wins_by_year = polish_wins['year'].value_counts().sort_index()
polish_wins_by_year.plot(kind='line', marker='o')
plt.xlabel('Year')
plt.ylabel('Number of Wins')
plt.title('Number of Wins by Polish Team Over the Years')
plt.show()

In [None]:

players_nona = players.dropna(subset=['ranking'])
average_ranking = players_nona.groupby('country_code')['ranking'].mean()
player_count = players['country_code'].value_counts()
normalized_ranking = average_ranking / player_count
top_teams = normalized_ranking.sort_values(ascending=True)
top_teams = top_teams[top_teams > 0].head(10)
plt.figure(figsize=(12, 8))
sns.barplot(x=top_teams.index, y=top_teams.values)
plt.title('Top 10 Nations - Normalized Average Rankings')
plt.xlabel('Country')
plt.ylabel('Normalized Average Ranking')
plt.xticks(rotation=45)
plt.show()


In [None]:
transfers_country = transfers.merge(teams, left_on='old_team_id', right_on='team_id', how='left')
transfers_country = transfers_country.merge(teams, left_on='new_team_id', right_on='team_id', how='left')

transfer_countries_old = transfers_country['country_code_x'].value_counts()
transfer_countries_new = transfers_country['country_code_y'].value_counts()

transfer_countries_sum = transfer_countries_old.add(transfer_countries_new, fill_value=0)
transfer_countries_sum = transfer_countries_sum.sort_values(ascending=True)
transfer_countries_new = transfer_countries_new.reindex(transfer_countries_sum.index).tail(20)
transfer_countries_old = transfer_countries_old.reindex(transfer_countries_sum.index).tail(20)


fig, ax = plt.subplots(1, 2, figsize=(12, 12), sharey='all')
plt.subplots_adjust(wspace=0.4)

ax[0].set_yticklabels(transfer_countries_old.index, fontsize=20)
ax[1].set_yticklabels(transfer_countries_old.index, fontsize=14)

ax[1].set_xlim([0, 2100])
ax[0].set_xlim([2100, 0])

ax[0].set_ylabel('Country', fontsize=20)
ax[1].set_xlabel('Number of Transfers', fontsize=20)
ax[0].set_xlabel('Number of Transfers', fontsize=20)
ax[0].set_title('Outgoing Transfers', fontsize=20)
ax[1].set_title('Incoming Transfers', fontsize=20)
ax[0].yaxis.tick_right()

sns.barplot(y=transfer_countries_old.index, x=transfer_countries_old.values, ax=ax[0])
sns.barplot(y=transfer_countries_new.index, x=transfer_countries_new.values, ax=ax[1])
plt.tight_layout()
plt.show()


In [None]:
leagues_global = [
    'World Championships', 'World League', 'World Cup', 'The Olympics',
    'The Olympic Qualification', 'European Championships', 
    'European Supercup', 'Volleyball Nations League',
    'Champions League', 'CEV Olympic Qualification'
]

aws = [awards[awards['league'] == ll] for ll in leagues_global]
awards_league_global = pd.DataFrame(aws[0])
[awards_league_global._append(aw) for aw in aws[1:]]

awards_with_players = awards_league_global.merge(players, on='player_id', how='left')
award_counts = awards_with_players.groupby(["award_date", "country_code"]).size().reset_index(name="count")
sorted_awards = award_counts.sort_values("count", ascending=False).head(40)
filtered_awards = awards_with_players.merge(sorted_awards, on=["award_date", "country_code"], how="inner")
sns.displot(data=filtered_awards, x="award_date", hue="country_code", multiple="stack")
plt.show()

In [None]:
transfers_collapsed = transfers.drop_duplicates(subset=['player_name', 'old_team_id', 'new_team_id'], keep='first', inplace=False, ignore_index=False)
not_nan = transfers_collapsed['new_team_id'].notna()
transfers_collapsed[not_nan]

In [None]:
awards.head()

In [None]:
body_builders.head()

In [None]:
coaches.head()

In [None]:
countries.head()

In [None]:
matches.head()

In [None]:
physiotherapists.head()

In [None]:
players.head()

In [None]:
presidents.head()

In [None]:
press_offices.head()

In [None]:
sport_directors.head()

In [None]:
stadiums.head()

In [None]:
statisticians.head()

In [None]:
team_mangers.head()

In [None]:
teams.head()

In [None]:
transfers

In [None]:
print(matches['league'].unique())