# Number of laureates who receive their award in the US who where born elsewhere

In [1]:
import json
import pandas as pd

In [5]:
file_path = "data/laureates-2024.json"

with open(file_path, 'r') as file:
    laureates = json.load(file)

In [9]:
# 1. How many laureates are there in the dataset?
# Prior to the 2024 awards, there were 992 laureates listed.
print(f"Number of laureates: {len(laureates)}")

Number of laureates: 1004


In [None]:
# Birth country
print(laureates[0]["birth"]["place"]["country"]["en"])

USA


In [17]:
# Birth country now
print(laureates[0]["birth"]["place"]["countryNow"]["en"])

USA


In [19]:
print(json.dumps(laureates[0]["nobelPrizes"], indent=2))

[
  {
    "awardYear": "2001",
    "category": {
      "en": "Economic Sciences",
      "no": "\u00d8konomi",
      "se": "Ekonomi"
    },
    "categoryFullName": {
      "en": "The Sveriges Riksbank Prize in Economic Sciences in Memory of Alfred Nobel",
      "no": "Sveriges Riksbanks pris i \u00f8konomisk vitenskap til minne om Alfred Nobel",
      "se": "Sveriges Riksbanks pris i ekonomisk vetenskap till Alfred Nobels minne"
    },
    "sortOrder": "2",
    "portion": "1/3",
    "dateAwarded": "2001-10-10",
    "prizeStatus": "received",
    "motivation": {
      "en": "for their analyses of markets with asymmetric information",
      "se": "f\u00f6r deras analys av marknader med assymetrisk informations"
    },
    "prizeAmount": 10000000,
    "prizeAmountAdjusted": 13927869,
    "affiliations": [
      {
        "name": {
          "en": "Stanford University",
          "no": "Stanford University",
          "se": "Stanford University"
        },
        "nameNow": {
          "en

In [24]:
for prize in laureates[0]["nobelPrizes"]:
    # print(json.dumps(prize["affiliations"], indent=2))
    for affiliation in prize["affiliations"]:
        print(affiliation["country"]["en"])

USA


In [61]:
barish_raw= [laureate for laureate in laureates if laureate.get('knownName', {}).get('en') == 'Barry C. Barish']
print(json.dumps(barish_raw[0]["nobelPrizes"][0]['affiliations'], indent=2))

[
  {
    "name": {
      "en": "LIGO/VIRGO Collaboration",
      "no": "LIGO/VIRGO Collaboration",
      "se": "LIGO/VIRGO Collaboration"
    },
    "nameNow": {
      "en": "LIGO/VIRGO Collaboration"
    },
    "locationString": {
      "en": "",
      "no": "",
      "se": ""
    }
  },
  {
    "name": {
      "en": "California Institute of Technology (Caltech)",
      "no": "California Institute of Technology (Caltech)",
      "se": "California Institute of Technology (Caltech)"
    },
    "nameNow": {
      "en": "California Institute of Technology (Caltech)"
    },
    "city": {
      "en": "Pasadena, CA",
      "no": "Pasadena, CA",
      "se": "Pasadena, CA"
    },
    "country": {
      "en": "USA",
      "no": "USA",
      "se": "USA"
    },
    "cityNow": {
      "en": "Pasadena, CA",
      "no": "Pasadena, CA",
      "se": "Pasadena, CA",
      "sameAs": [
        "https://www.wikidata.org/wiki/Q485176",
        "https://www.wikipedia.org/wiki/Pasadena,_California"
      ],

In [None]:
data = []

for laureate in laureates:
    # NAME
    name = laureate.get('knownName', {}).get('en', 'Unknown')

    prizes = laureate.get('nobelPrizes', [])

    categories = []
    affiliation_countries = []

    for prize in prizes:
        # CATEGORY
        category = prize.get('category', {}).get('en', "Unknown")
        categories.append(category)

        # AFFILIATION
        affiliations = prize.get('affiliations', [])
        for affiliation in affiliations:
            country = affiliation.get('country', {}).get('en', "Unknown")
            affiliation_countries.append(country)

    # If the categories contains 'Literature' or 'Peace' or 'Economic Sciences' skip the laureate
    if 'Literature' in categories or 'Peace' in categories or 'Economic Sciences' in categories:
        continue

    # If the affiliation_countries list does not contain 'USA' skip the laureate
    if 'USA' not in affiliation_countries:
        continue

    # BIRTH PLACE
    birth_place = laureate.get('birth', {}).get('place', {})
    birth_country = birth_place.get('country', {}).get('en', "Unknown")
    birth_country_now = birth_place.get('countryNow', {}).get('en', "Unknown")

    data.append({
        'name': name,
        'birth_country': birth_country,
        'birth_country_now': birth_country_now,
        'category': categories,
        'affiliation_countries': affiliation_countries
    })

In [65]:
barish_data= [laureate for laureate in data if laureate['name'] == 'Barry C. Barish']
print(json.dumps(barish_data[0], indent=2))

{
  "name": "Barry C. Barish",
  "birth_country": "USA",
  "birth_country_now": "USA",
  "categories": "Physics",
  "affiliation_countries": [
    "Unknown",
    "USA"
  ]
}


In [67]:
df = pd.DataFrame(data)

In [69]:
df.head()

Unnamed: 0,name,birth_country,birth_country_now,category,affiliation_countries
0,Adam G. Riess,USA,USA,[Physics],"[USA, USA]"
1,Ahmed Zewail,Egypt,Egypt,[Chemistry],[USA]
2,Alan Heeger,USA,USA,[Chemistry],[USA]
3,Alan MacDiarmid,New Zealand,New Zealand,[Chemistry],[USA]
4,Albert A. Michelson,Prussia,Poland,[Physics],[USA]


In [73]:
# Calculate the proportion of entries where birth_country_now is not 'USA'
non_usa_birth_country_now = df[df['birth_country_now'] != 'USA']

In [75]:
total_non_usa_born_laureates = len(non_usa_birth_country_now)
total_non_usa_born_laureates

113

In [74]:
total_usa_laureates = len(df)
total_usa_laureates

318

In [76]:
proportion_non_usa = total_non_usa_born_laureates / total_usa_laureates
print(f"Proportion of entries with birth_country_now not 'USA': {proportion_non_usa:.2f}")

Proportion of entries with birth_country_now not 'USA': 0.36


In [71]:
df.describe()

Unnamed: 0,name,birth_country,birth_country_now,category,affiliation_countries
count,318,318,318,318,318
unique,318,38,36,5,13
top,Adam G. Riess,USA,USA,[Physiology or Medicine],[USA]
freq,1,205,205,121,276


In [72]:
df.to_csv('data/laureates-awarded-in-usa-data.csv', index=False)