## Import Statements


In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sna
import plotly.express as px

### Reading .csv


In [7]:
df_data = pd.read_csv("nobel_prize_data.csv")

### Formatting NoteBook


In [8]:
pd.options.display.float_format = "{:,.2f}".format

##### Shape of DataFrame


In [9]:
df_data.shape

(962, 16)

##### Column Names


In [14]:
df_data.columns

Index(['year', 'category', 'prize', 'motivation', 'prize_share',
       'laureate_type', 'full_name', 'birth_date', 'birth_city',
       'birth_country', 'birth_country_current', 'sex', 'organization_name',
       'organization_city', 'organization_country', 'ISO'],
      dtype='object')

##### First Nobel Prize


In [15]:
display(df_data.nsmallest(n=1, columns="year"))

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_type,full_name,birth_date,birth_city,birth_country,birth_country_current,sex,organization_name,organization_city,organization_country,ISO
0,1901,Chemistry,The Nobel Prize in Chemistry 1901,"""in recognition of the extraordinary services ...",1/1,Individual,Jacobus Henricus van 't Hoff,1852-08-30,Rotterdam,Netherlands,Netherlands,Male,Berlin University,Berlin,Germany,NLD


##### Last Nobel Prize


In [16]:
display(df_data.nlargest(n=1, columns="year"))

Unnamed: 0,year,category,prize,motivation,prize_share,laureate_type,full_name,birth_date,birth_city,birth_country,birth_country_current,sex,organization_name,organization_city,organization_country,ISO
950,2020,Chemistry,The Nobel Prize in Chemistry 2020,“for the development of a method for genome ed...,1/2,Individual,Emmanuelle Charpentier,1968-12-11,Juvisy-sur-Orge,France,France,Female,Max-Planck-Institut,Berlin,Germany,FRA


##### Convert the `birth_date` column to Pandas `Datetime` objects


In [19]:
df_data["birth_date"] = pd.to_datetime(df_data["birth_date"])

##### Add a Column called `share_pct` which has the laureates' share as a percentage in the form of a floating-point number.


In [29]:
df_data.head()

seperated_value = df_data["prize_share"].str.split("/", expand=True)
num = pd.to_numeric(seperated_value[0])
d_num = pd.to_numeric(seperated_value[1])
result = num / d_num

df_data["share_pct"] = result

### Male & Female Total Share


In [44]:
gender_shares = df_data["sex"].value_counts()


# Setting Coordinates:
DATAFRAME = gender_shares
LABEL = gender_shares.index
VALUES = gender_shares.values

pie = px.pie(
    data_frame=DATAFRAME,
    title="Male VS Female Nobel Share",
    labels=LABEL,
    values=VALUES,
    names=LABEL,
    hole=0.2,
)

pie.update_traces(
    textposition="inside",
    textinfo="label+percent",
    marker=dict(colors=["dodgerblue", "crimson"]),
)

pie.show()

In [37]:
gender_shares.index

Index(['Male', 'Female'], dtype='object', name='sex')