In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
location_countries = pd.read_csv("Data/countries.csv")
location_countries = location_countries.rename(columns = {"country": "country_code"})
location_countries.head(5)

In [None]:
user_location_df = pd.read_csv("Data/clean-locations.csv")
user_location_df = user_location_df.drop_duplicates(subset=['user_id'], keep='first')
user_location_df = user_location_df[["user_id", "country_code"]]
user_location_df["country_code"] = user_location_df["country_code"].str.upper()
user_location_df = user_location_df.sort_values(by = ["user_id"])
user_location_df

In [None]:
#Calculate the number of user watching anime for each country
num_users_per_country = user_location_df.groupby(['country_code']).agg({
    'user_id': 'count'  # Count of anime_id
})

# Rename columns as per the SQL query
num_users_per_country.columns = ['num_users']

# Reset index to make anime_id a column again
num_users_per_country.reset_index(inplace=True)
num_users_per_country = num_users_per_country.sort_values(by= ["num_users"], ascending=False)

In [None]:
num_users_per_country = pd.merge(num_users_per_country, location_countries, on='country_code', how='inner')
num_users_per_country

In [None]:
#Only plot top 30 countries
num_users_per_country_shortened = num_users_per_country[:50]


keys = list(num_users_per_country_shortened["name"])
values = list(num_users_per_country_shortened["num_users"])
print(len(values))


plt.figure(figsize=(12, 10))
plt.bar(keys, values, color='skyblue')
plt.xlabel('Countries_code')
plt.ylabel('Number of users watching anime')
plt.title('Distribution Plot')
plt.xticks(rotation='vertical')  
plt.tight_layout()
plt.show()

In [None]:
num_users_per_country

In [None]:
# Geo spatial layout

fig = px.choropleth(num_users_per_country, 
                    locations = 'name',
                    locationmode = 'country names',
                    color="num_users",
                    hover_name="name",
                    projection="natural earth",
                    color_continuous_scale="Viridis")

fig.update_geos(showcoastlines=False, coastlinecolor="Black", showland=True, landcolor="white",
                showocean=True, oceancolor="#add8e6", showlakes=False)

fig.update_layout(title_text='Number of Users by Country')
fig.show()