In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
player_info = pd.read_csv('clean_data/player_info.csv')
org_wins = pd.read_csv('clean_data/org_wins.csv')
player_wins = pd.read_csv('clean_data/player_wins.csv')

In [None]:
#player info eda

In [None]:
player_info.head()

In [None]:
player_info.player_status.unique()

In [None]:
active_players = pd.DataFrame(player_info[player_info['player_status'] == 'Active'].nationality.value_counts())
active_players = active_players.reset_index().rename(columns={'nationality':'count', 'index':'nationality'})

retired_players = pd.DataFrame(player_info[player_info['player_status'] == 'Retired'].nationality.value_counts())
retired_players = retired_players.reset_index().rename(columns={'nationality':'count', 'index':'nationality'})

unkown_players = pd.DataFrame(player_info[player_info['player_status'] == 'Unkown'].nationality.value_counts())
unkown_players = unkown_players.reset_index().rename(columns={'nationality':'count', 'index':'nationality'})

banned_players = pd.DataFrame(player_info[player_info['player_status'] == 'Banned'].nationality.value_counts())
banned_players = banned_players.reset_index().rename(columns={'nationality':'count', 'index':'nationality'})

inactive_players = pd.DataFrame(player_info[player_info['player_status'] == 'Inactive'].nationality.value_counts())
inactive_players = inactive_players.reset_index().rename(columns={'nationality':'count', 'index':'nationality'})

In [None]:
active_players

In [None]:
retired_players

In [None]:
sns.countplot(data=player_info, x='nationality')
plt.xlabel('Nationality')
plt.ylabel('Count')
plt.title('Distribution of Player Nationalities')
plt.xticks(rotation=90)
plt.show()

In [None]:
#top 3 nationalities with highest player base
nationality_counts = player_info['nationality'].value_counts()

top_3 = nationality_counts.head(3)

other_count = nationality_counts[3:].sum()
top_3['Other'] = other_count

plt.pie(top_3, labels=top_3.index, autopct='%1.1f%%')
plt.title('Top 3 Highest Player Base Nations')
plt.axis('equal')
plt.show()

In [None]:
sns.countplot(data=player_info[player_info['nationality'] == 'United States'], x='nationality', hue='player_status')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)

In [None]:
#org wins eda

In [None]:
org_wins.head()

In [None]:
top_earning_teams = org_wins.groupby('team')['earnings'].sum().nlargest(5)
sns.barplot(x=top_earning_teams.index, y=top_earning_teams.values)
plt.xlabel('Team')
plt.ylabel('Total Earnings')
plt.title('Top Earning Teams')
plt.xticks(rotation=90)
plt.show()

In [None]:
team_name = 'TSM'
team_data = org_wins[org_wins['team'] == team_name]
sns.lineplot(x='year', y='earnings', data=team_data)
plt.xlabel('Year')
plt.ylabel('Earnings')
plt.title(f'Earnings Over the Years for {team_name}')
plt.show()

In [None]:
custom_palette = sns.color_palette('Set1')
sns.scatterplot(data=org_wins, x='earnings_rank', y='earnings', hue='year', palette=custom_palette)
plt.xlabel('Earnings Rank')
plt.ylabel('Earnings')
plt.title('Earnings vs. Earnings Rank with Year Hue')
plt.legend(title='Year')
plt.show()

In [None]:
#player_wins eda

In [None]:
player_wins.head()

In [None]:
top_earning_players = player_wins.groupby('player_name')['earnings'].sum().nlargest(5)
sns.barplot(x=top_earning_players.index, y=top_earning_players.values)
plt.xlabel('Player')
plt.ylabel('Total Earnings')
plt.title('Top Earning Players')
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.hist(player_wins['earnings'], bins=10, edgecolor='black')
plt.xlabel('Earnings')
plt.ylabel('Count')
plt.title('Distribution of Player Earnings')
plt.show()

In [None]:
mean_earnings_by_nationality = player_wins.groupby('nationality')['earnings'].mean()
print(mean_earnings_by_nationality.nlargest(10))
# Select the top three nationalities with the highest mean earnings
top_3_nationalities = mean_earnings_by_nationality.nlargest(10).index

# Filter the 'player_wins' dataframe to include only the selected nationalities
filtered_data = player_wins[player_wins['nationality'].isin(top_3_nationalities)]

# Create the box plot
sns.boxplot(data=filtered_data, x='nationality', y='earnings')
plt.xlabel('Nationality')
plt.ylabel('Earnings')
plt.title('Box Plot of Earnings by Nationality (Top 3 Mean Earnings)')
plt.xticks(rotation=90)
plt.show()