In [None]:
# Loading in required libraries
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np

df = pd.read_csv("../data/nobel.csv")

#1 What is the most commonly awarded gender and birth country?
#Store your answers as string variables top_gender and top_country.
top_gender = df["sex"].mode()[0]
top_country = df['birth_country'].mode()[0]
print(f"1) The most commonly awarded gender and birth country: {top_country}, {top_gender}")

# Graphic:
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='sex')
plt.title('Count of Laureates by Gender')
plt.xlabel('Gender')
plt.ylabel('Number of Laureates')
plt.tight_layout()
plt.show()

#2 Which decade had the highest ratio of US-born Nobel Prize winners to total winners in all categories?
df["US_born_winners"] = df["birth_country"] == 'United States of America'
df["decade"] = (np.floor(df["year"]/10)*10).astype(int)
ratio_of_USwinners = df.groupby("decade", as_index=False)["US_born_winners"].mean()
max_decade_usa = ratio_of_USwinners.loc[ratio_of_USwinners["US_born_winners"].idxmax(), 'decade']
print(f"2) The decade with the highest ratio of US-born winners is: {max_decade_usa}")

# Graphic:
sns.relplot(x="decade", y="US_born_winners", kind="line", data=df)
plt.show()

#3 Which decade and category had the highest proportion of female laureates?
df["female_winners"] = df["sex"] == 'Female'
ratio_of_female = df.groupby(["decade","category"], as_index=False)["female_winners"].mean()
max_row = ratio_of_female.loc[ratio_of_female['female_winners'].idxmax()]
best_decade = int(max_row['decade'])
best_category = max_row['category']
print(f"3) The highest female proportion was in {best_decade}, category: {best_category}")
sns.relplot(x='decade', y='female_winners', kind='line', data=df)
plt.show()

#4 Who was the first woman to receive a Nobel Prize, and in what category?
new_df = df[df['female_winners']]
min_row = new_df.loc[new_df['year'].idxmin()]
print(f"4) First woman laureate: {min_row['full_name']} in {min_row['category']}")

#5 Which individuals or organizations have won more than one Nobel Prize?
repeat_counts = df['full_name'].value_counts()
repeat_counts = repeat_counts[repeat_counts > 1]
print("5) Multiple laureates:")
for name,count in repeat_counts.items():
    print(f"- {name}: {count} prizes")

# Graphic:
plt.figure()
repeat_counts.plot(kind='bar')
plt.show()
