In [3]:
import pandas as pd

noc_regions = pd.read_csv("noc_regions.csv")

In [4]:
# ci-dessous : on importe le dataframe du monde (en vue d'afficher la carte)

import geopandas as gpd
import matplotlib.pyplot as plt

url = "https://naciscdn.org/naturalearth/110m/cultural/ne_110m_admin_0_countries.zip"
world = gpd.read_file(url)

In [5]:
# dictionnaire pour faire matcher "region" de noc_regions avec "SOVEREIGNT" de world 

overrides = { 
    "USA" : "United States of America",
    "Trinidad" : "Trinidad and Tobago",
    "Serbia": "Republic of Serbia",
    "Tanzania" : "United Republic of Tanzania", 
    "UK" : "United Kingdom",
    "Bahamas" : "The Bahamas", 
    "Macedonia" : "North Macedonia",
    "Czech Republic" : "Czechia" 
}

In [6]:
# je souhaite créer un dataframe avec 4 colonnes :
# region et NOC de noc_regions
# SOV_A3 et SOVEREIGNT de world


import pandas as pd


df = (pd.DataFrame
    .from_dict(overrides,
    orient = 'index',
    columns = ["SOVEREIGNT"])
.reset_index()
.rename(columns = {"index" : "short_names"})
)

df = df.merge(
    world[["SOVEREIGNT", "SOV_A3"]],
    on = "SOVEREIGNT", 
    how = 'left'
)

df = df.merge(
    noc_regions[['NOC','region']],
    left_on = "short_names",
    right_on = "region",
    how = 'left'
)

result = df[['region','NOC', 'SOV_A3', 'SOVEREIGNT']]
result

Unnamed: 0,region,NOC,SOV_A3,SOVEREIGNT
0,USA,USA,US1,United States of America
1,USA,USA,US1,United States of America
2,Trinidad,TTO,TTO,Trinidad and Tobago
3,Trinidad,WIF,TTO,Trinidad and Tobago
4,Serbia,SCG,SRB,Republic of Serbia
5,Serbia,SRB,SRB,Republic of Serbia
6,Serbia,YUG,SRB,Republic of Serbia
7,Tanzania,TAN,TZA,United Republic of Tanzania
8,UK,GBR,GB1,United Kingdom
9,UK,GBR,GB1,United Kingdom


In [7]:
# Maintenant l'objectif est de créer un dictionnaire
# correspondance SOV_A3 -> NOCs
# à partir du dataframe précédent


# on sélectionne les colonnes SOV_A3 et NOC, onlève les N/A
df_full = df[['SOV_A3', 'NOC']].dropna(subset=['SOV_A3','NOC'])

mapping_sov3_to_nocs = (
    df_full
    .groupby('SOV_A3')['NOC']
    .apply(lambda ser: sorted(set(ser)))
    .to_dict()
    )

# print pour vérifier
for sov_a3, nocs in mapping_sov3_to_nocs.items():
    print(f"{sov_a3!r} -> {nocs}")


'BHS' -> ['BAH']
'CZE' -> ['BOH', 'CZE', 'TCH']
'GB1' -> ['GBR']
'MKD' -> ['MKD']
'SRB' -> ['SCG', 'SRB', 'YUG']
'TTO' -> ['TTO', 'WIF']
'TZA' -> ['TAN']
'US1' -> ['USA']
