# Lab | APIS

In this lab, you will collect historical data about the Nobel Prize winners using [this free and non-authenticated API](https://www.nobelprize.org/organization/developer-zone-2/). According to the documentation available [here](https://app.swaggerhub.com/apis/NobelMedia/NobelMasterData/2.1#/default/get_nobelPrizes). The base url is: "http://api.nobelprize.org/2.1/" followed by a string to specify what kind of information do you want to retrieve. The acceptable options are:

* nobelPrices
* nobelPrice/category/year
* laureates
* laureate/laureateID

# Getting the information using requests

Use the Python `requests`, and `json` libraries to obtain the information of ALL the Nobel Prizes. Make sure to verify that you get the proper status code (200).

The json outputs are simple plain text that need to be converted into the corresponding nested dictionary. Use the `.json()` method to cast the output into a Python dictionary.

Use the Pandas library to collect all the information into a Panda's DataFrame.

In [72]:
import requests
import json
import pandas as pd

url = "http://api.nobelprize.org/2.1/nobelPrizes?limit=100000"

response = requests.get(url)
prizes = []
if response.status_code == 200:
        print("All good!")
        print("==============")
        print("\n")
        data = response.json()['nobelPrizes']
        prizes.append(data) 
        prizes_df = pd.json_normalize(data)

prizes_df

# Your code here

All good!




Unnamed: 0,awardYear,dateAwarded,prizeAmount,prizeAmountAdjusted,links,laureates,category.en,category.no,category.se,categoryFullName.en,categoryFullName.no,categoryFullName.se,topMotivation.en,topMotivation.se
0,1901,1901-11-12,150782,10531894,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '160', 'knownName': {'en': 'Jacobus H....",Chemistry,Kjemi,Kemi,The Nobel Prize in Chemistry,Nobelprisen i kjemi,Nobelpriset i kemi,,
1,1901,1901-11-14,150782,10531894,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '569', 'knownName': {'en': 'Sully Prud...",Literature,Litteratur,Litteratur,The Nobel Prize in Literature,Nobelprisen i litteratur,Nobelpriset i litteratur,,
2,1901,1901-12-10,150782,10531894,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '462', 'knownName': {'en': 'Henry Duna...",Peace,Fred,Fred,The Nobel Peace Prize,Nobels fredspris,Nobels fredspris,,
3,1901,1901-11-12,150782,10531894,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '1', 'knownName': {'en': 'Wilhelm Conr...",Physics,Fysikk,Fysik,The Nobel Prize in Physics,Nobelprisen i fysikk,Nobelpriset i fysik,,
4,1901,1901-10-30,150782,10531894,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '293', 'knownName': {'en': 'Emil von B...",Physiology or Medicine,Fysiologi eller medisin,Fysiologi eller medicin,The Nobel Prize in Physiology or Medicine,Nobelprisen i fysiologi eller medisin,Nobelpriset i fysiologi eller medicin,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
665,2023,2023-10-09,11000000,11000000,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '1034', 'knownName': {'en': 'Claudia G...",Economic Sciences,Økonomi,Ekonomi,The Sveriges Riksbank Prize in Economic Scienc...,Sveriges Riksbanks pris i økonomisk vitenskap ...,Sveriges Riksbanks pris i ekonomisk vetenskap ...,,
666,2023,2023-10-05,11000000,11000000,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '1032', 'knownName': {'en': 'Jon Fosse...",Literature,Litteratur,Litteratur,The Nobel Prize in Literature,Nobelprisen i litteratur,Nobelpriset i litteratur,,
667,2023,2023-10-06,11000000,11000000,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '1033', 'knownName': {'en': 'Narges Mo...",Peace,Fred,Fred,The Nobel Peace Prize,Nobels fredspris,Nobels fredspris,,
668,2023,2023-10-03,11000000,11000000,"[{'rel': 'nobelPrize', 'href': 'https://api.no...","[{'id': '1026', 'knownName': {'en': 'Pierre Ag...",Physics,Fysikk,Fysik,The Nobel Prize in Physics,Nobelprisen i fysikk,Nobelpriset i fysik,,


# Processing the output

Process the Pandas DataFrame in order to have only the following columns:

- category
- dateAwarded (as DateTime in "yyyy-mm-dd" format)
- prizeAmount
- prizeAmountAdjusted
- Number_of_laureates
- motivation
- laureate_ids (as a list)

In [151]:
import requests
import pandas as pd

# URL for the API
url = "http://api.nobelprize.org/2.1/nobelPrizes?limit=100000"

# Fetch data from the API
response = requests.get(url)

if response.status_code == 200:
    data = response.json()['nobelPrizes']
    
    # Convert JSON data to DataFrame directly
    prizes_df = pd.json_normalize(data)
    
    # Required columns (with corresponding nested names in JSON)
    required_columns = {
        'category': 'category.en',
        'dateAwarded': 'awardYear',
        'prizeAmount': 'prizeAmount',
        'prizeAmountAdjusted': 'prizeAmountAdjusted',
        'Number_of_laureates': 'laureates',  # This will be the list of laureates
        'laureate_ids': 'laureates'  # The entire list of laureates will be processed later
    }

    # Extract the necessary columns based on the keys
    filtered_df = prizes_df[list(required_columns.values())].copy()
    filtered_df.columns = list(required_columns.keys())

      # Convert 'dateAwarded' to DateTime in "yyyy-mm-dd" format
    filtered_df['dateAwarded'] = filtered_df['dateAwarded'].astype(str) + '-01-01'
    filtered_df['dateAwarded'] = pd.to_datetime(filtered_df['dateAwarded'], format='%Y-%m-%d')

    # Convert 'Number_of_laureates' to a count of laureates
    filtered_df['Number_of_laureates'] = filtered_df['Number_of_laureates'].count()

    # Convert 'laureate_ids' to a list of ids
    #filtered_df['laureate_ids'] = filtered_df['laureate_ids'].apply(lambda laureates: [laureate['id'] for laureate in laureates] if pd.notnull(laureates) and isinstance(laureates, list) else [])



    # Print the resulting DataFrame
    print(filtered_df)
else:
    print("Failed to retrieve data.")

                   category dateAwarded  prizeAmount  prizeAmountAdjusted  \
0                 Chemistry  1901-01-01       150782             10531894   
1                Literature  1901-01-01       150782             10531894   
2                     Peace  1901-01-01       150782             10531894   
3                   Physics  1901-01-01       150782             10531894   
4    Physiology or Medicine  1901-01-01       150782             10531894   
..                      ...         ...          ...                  ...   
665       Economic Sciences  2023-01-01     11000000             11000000   
666              Literature  2023-01-01     11000000             11000000   
667                   Peace  2023-01-01     11000000             11000000   
668                 Physics  2023-01-01     11000000             11000000   
669  Physiology or Medicine  2023-01-01     11000000             11000000   

     Number_of_laureates                                       laureate_ids

# Getting a Pandas DataFrame with the details of awarded authors/institutions

If you dive deeper and use the API to retrieve the details of some laureate_ids, you will notice that not allways the Nobel Prize was awarded to individuals. In some cases, the awards were given to institutions.

Get the unique ids from the previous datasets and prepare the following functions:

- get_name(laureate) ( it should return the english name 'fullName' of the individual or 'orgName' of the institution )

- get_gender(laureate) ( it should return the gender or 'Unknown' for individuals, and 'None' for institutions )

- get_birthdate(laureate) ( it should return the birthdate when it's avaialble or 'Unknown' otherwise )

- get_age(laureate) ( it should return the age of the awarded individual or 'Unknown' when it's not avaialble or for institutions )

- get_city(laureate) ( it should return the english name of the city when it's available or 'Unknown' otherwise )

- get_country(laureate) ( it should return the english name of the country when it's available or 'Unknown' otherwise )

- get_continent(laureate) ( it should return the english name of the continent when it's available or 'Unknown' otherwise )

- get_latitude(laureate) ( it should return the city's latitude when it's available or 'Unknown' otherwise )

- get_longitude(laureate) ( it should return the city's longitude
 when it's available or 'Unknown' otherwise )

Create the following dictionaries:

```python
laureates_dict = {"ID": [], "Name": [], "Gender": [], \
                  "Birth_date": [], "Age": [], \
                  "City": [], "Country": [], "Continent": [], \
                  "Latitude": [], "Longitude": []}                        

functions_dict = {"ID": None, "Name": get_name, "Gender": get_gender, \
                  "Birth_date": get_birthdate, "Age": get_age, \
                  "City": get_city, "Country": get_country, "Continent": get_continent, \
                  "Latitude": get_latitude, "Longitude": get_longitude}
```

For each unique `laureate_id` of the previous DataFrame make an API call to get the details of the awarded individual/intitution and iterate of the previous dictionaries keys in order to add the corresponding information of each `laureate_id` in the empty lists of `laureates_dict`.

Finally, create a Pandas DataFrame named `laureates_df` using the `laureates_dict`.

In [154]:
import time
from tqdm import tqdm


ids = [int(item) for l in prizes_df['laureate_ids'].values for item in l]
unique_ids = set(ids)


def get_name(laureate):
    return laureate.get('fullName', {}).get('en') or laureate.get('orgName', {}).get('en', 'Unknown')

def get_gender(laureate):
    return laureate.get('gender', 'Unknown') if 'gender' in laureate else 'None'

def get_birthdate(laureate):
    return laureate.get('birth', {}).get('date', 'Unknown')

def get_age(laureate):
    birthdate = get_birthdate(laureate)
    if birthdate == 'Unknown':
        return 'Unknown'
    birth_date = datetime.strptime(birthdate, "%Y-%m-%d")
    award_year = datetime.strptime(laureate['awardYear'], '%Y')
    return award_year.year - birth_date.year

def get_city(laureate):
    return laureate.get('birth', {}).get('place', {}).get('city', {}).get('en', 'Unknown')

def get_country(laureate):
    return laureate.get('birth', {}).get('place', {}).get('country', {}).get('en', 'Unknown')

def get_continent(laureate):
    return laureate.get('birth', {}).get('place', {}).get('continent', {}).get('en', 'Unknown')

def get_latitude(laureate):
    return laureate.get('birth', {}).get('place', {}).get('location', {}).get('latitude', 'Unknown')

def get_longitude(laureate):
    return laureate.get('birth', {}).get('place', {}).get('location', {}).get('longitude', 'Unknown')

laureates_dict = {"ID": [], "Name": [], "Gender": [], \
                  "Birth_date": [], "Age": [], \
                  "City": [], "Country": [], "Continent": [], \
                  "Latitude": [], "Longitude": []}

functions_dict = {"ID": None, "Name": get_name, "Gender": get_gender, \
                  "Birth_date": get_birthdate, "Age": get_age, \
                  "City": get_city, "Country": get_country, "Continent": get_continent, \
                  "Latitude": get_latitude, "Longitude": get_longitude}

for index, id in enumerate(tqdm(unique_ids)):

    url = "https://api.nobelprize.org/2/laureate/" + str(id)
    response = requests.get(url)

    if response.status_code == 200:

        laureate = response.json()

        # Your code here

laureates_df = pd.DataFrame(laureates_dict)

laureates_df



KeyError: 'laureate_ids'

# Country ranking

Get a ranking countries by the number of times that they had been awarded in any category.

In [None]:
# Your code here



Unnamed: 0_level_0,ID
Country,Unnamed: 1_level_1
USA,296
United Kingdom,91
Germany,84
France,63
Russia,30
...,...
Greece,1
Ghana,1
Faroe Islands (Denmark),1
Ethiopia,1
