# Statistics

In [None]:
import pandas as pd
games_df = pd.read_csv("games.csv")
rounds_df = pd.read_csv("rounds_in_games.csv")

## Words


Distribution of words by number of letters.

In [None]:
words_df = pd.DataFrame(rounds_df["word"])
words_df["word_length"] = words_df["word"].apply(len)
word_length_distribution = (
    words_df.groupby("word_length").size().reset_index(name="count")
)
print(word_length_distribution)

## Rounds

Number of distinct users who have played.

In [None]:
distinct_usernames = games_df["username"].unique()
print(len(distinct_usernames))

Top 5 players by rounds won per game.

In [None]:
rounds_won_per_game = (
    rounds_df.groupby(["game_id", "username"]).agg({"victory": "sum"}).reset_index()
)

average_rounds_won = (
    rounds_won_per_game.groupby("username")["victory"].mean().reset_index()
)

top_players = average_rounds_won.nlargest(5, "victory")

print(top_players)

## Game

Top 5 players by final score.

In [None]:
top_players = games_df.sort_values(by="final_score", ascending=False).head(5)[
    ["username", "final_score"]
]

print(top_players)

Top 5 users by mean score.

In [None]:
average_score_per_user = (
    games_df.groupby("username")["final_score"].mean().reset_index()
)

top_users = average_score_per_user.nlargest(5, "final_score")

print(top_users)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.hist(
    games_df["final_score"], bins=[0, 1, 2, 3, 4], color="skyblue", edgecolor="black"
)

plt.title("Games distribution by score")
plt.xlabel("Final score")
plt.ylabel("Number of games")

plt.xticks(range(4))

plt.grid(True)
plt.show()