In [4]:
import json
from datetime import datetime

In [5]:
file_path = "data/laureates-1000.json"

with open(file_path, 'r') as file:
    data = json.load(file)

laureates = data['laureates']

In [6]:
"""
Calculate the age of a Nobel Prize laureate when they were awarded the prize.
"""
def calculate_nobel_age(birth_date: str, award_year: str) -> int:
    # Check if the birth_date includes month and day
    if "-00-00" in birth_date:
        # Extract only the year and assume the earliest possible birth date (January 1st)
        birth_year = int(birth_date[:4])
        birth_date = datetime(year=birth_year, month=1, day=1)
        # We'll have to assume that they were at least born in that year,
        # so the calculated age will be the maximum possible given the information.
        incomplete_date = True
    else:
        # Convert the birth_date string to a datetime object
        birth_date = datetime.strptime(birth_date, "%Y-%m-%d")
        incomplete_date = False
    
    # Create a datetime object for December 10th of the award year
    award_date = datetime(year=int(award_year), month=12, day=10)
    
    # Calculate the difference in years
    age = award_date.year - birth_date.year
    
    if not incomplete_date:
        # If the award date is before the laureate's birthday in that year, subtract 1 from the age
        if (award_date.month, award_date.day) < (birth_date.month, birth_date.day):
            age -= 1
    
    return age

In [36]:
data = []

for laureate in laureates:
    # NAME
    name = laureate.get('knownName', {}).get('en', 'Unknown')
    # BIRTH DATE
    birth_date = laureate.get('birth', {}).get('date')
    if not birth_date or birth_date.startswith('0000'):  # Skip entries with missing or invalid birth dates
        continue
    
    prizes = laureate.get('nobelPrizes', [])
    if not prizes:  # Skip entries with missing prizes
        continue
    
    # Sort the prizes by year of award
    prizes_sorted = sorted(prizes, key=lambda x: int(x['awardYear']))

    first_prize = prizes_sorted[0]

    # CATEGORY Get the category of the first prize
    category = first_prize.get('category', {}).get('en')
    if category in ['Literature', 'Peace']:  # Exclude 'Literature' and 'Peace' prizes
        continue

    award_year = first_prize['awardYear']

    # Calculate the age of the laureate at the time of the first prize
    if award_year and birth_date:
        age_at_award = calculate_nobel_age(birth_date, award_year)

    affiliations = first_prize.get('affiliations', [])
    if not affiliations:  # Skip entries with missing affiliations
        continue

    first_affiliation = affiliations[0]

    # AFFILIATION
    affil_city = first_affiliation.get('cityNow', {})
    if not affil_city:  # Skip entries with missing city information
        continue

    affil_lat = affil_city.get('latitude')
    affil_lon = affil_city.get('longitude')
    if not affil_lat or not affil_lon:  # Skip entries with missing coordinates
        continue


    # GENDER
    gender = laureate.get('gender', 'Unknown')

    # BIRTH PLACE
    birth_place = laureate.get('birth', {}).get('place', {})
    birth_city = birth_place.get('cityNow', {})
    if not birth_city:  # Skip entries with missing birth city information
        continue
    
    birth_lat = birth_city.get('latitude')
    birth_lon = birth_city.get('longitude')

    if not birth_lat or not birth_lon:
        continue

    award = {
        'category': category,
        'data': f"{award_year}-12-10",
        'lat': float(affil_lat),
        'lon': float(affil_lon),
        'age': age_at_award
    }

    birth = {
        'date': birth_date,
        'lat': float(birth_lat),
        'lon': float(birth_lon)
    }

    data.append({
        'name': name,
        'gender': gender,
        'award': award,
        'birth': birth
    })


In [37]:
print(json.dumps(data[0], indent=2))

{
  "name": "A. Michael Spence",
  "gender": "male",
  "award": {
    "category": "Economic Sciences",
    "data": "2001-12-10",
    "lat": 37.424734,
    "lon": -122.163858,
    "age": 58
  },
  "birth": {
    "date": "1943-00-00",
    "lat": 40.82593,
    "lon": -74.20903
  }
}


In [38]:
# Save the data to a JSON file
file_path = './data/laureates-data.json'

In [39]:
with open(file_path, 'w') as file:
    json.dump(data, file)