In [1]:
import json
import pandas as pd
import unicodedata
import re
from fuzzywuzzy import fuzz

COMP_MAP = {r.composerID: r.composerName 
            for i, r in pd.read_table('data/interim/composer_key.txt', sep='\t').iterrows()}

ARTIST_NULLKEYS = [
    'mbzid', 'country', 'gender', 'sortname', 
    'area_name', 'beginarea_name', 'endarea_name', 'area_id', 'beginarea_id', 'endarea_id',
    'lifespan_begin', 'lifespan_end', 'lifespan_ended'
]


def normalize(string):
    return str(unicodedata.normalize('NFKD', string))


def flatten_artist(artist):
    final = {k: None for k in ARTIST_NULLKEYS}
    final['n_aliases'] = 0
    final['n_tags'] = 0
    final['disambig_composer'] = False
    final['tag_composer'] = False

    # basics
    final['mbzid'] = artist['id']
    final['country'] = artist.get('country')
    final['gender'] = artist.get('gender')
    final['sortname'] = artist.get('sort-name')

    if artist.get('area'):
        final['area_name'] = artist.get('area').get('name')
        final['area_id'] = artist.get('area').get('id')

    if artist.get('begin-area'):
        final['beginarea_name'] = artist.get('begin-area').get('name')
        final['beginarea_id'] = artist.get('begin-area').get('id')
    
    if artist.get('end-area'):
        final['endarea_name'] = artist.get('end-area').get('name')
        final['endarea_id'] = artist.get('end-area').get('id')

    ls = artist.get('life-span')
    if ls:
        final['lifespan_begin'] = ls.get('begin')
        final['lifespan_end'] = ls.get('end')
        final['lifespan_ended'] = ls.get('ended')

    if artist.get('aliases'):
        final['n_aliases'] = len(artist['aliases'])

    tags = artist.get('tags')
    if tags:
        final['n_tags'] = len(tags)
        final['tag_composer'] = any([t['name'] == 'composer' for t in tags])

    disambig = artist.get('disambiguation')
    if disambig:
        final['disambig_composer'] = 'composer' in disambig

    return final


def first_pass(data):
    final = []
    for i, d in enumerate(data):
        # skip if nothing found by mbz
        if d is None:
            continue

        search = d.get('search')
        cleanname = search.get('name').replace('  ', ' ').replace(' sir ', ' ').replace(',', '').lower()
        cleanname = normalize(cleanname)
        artists = d.get('artists')

        for a in artists:
            
            # cut based off of mbz search 'score'
            if int(a['score']) < 95:
                continue
            
            # cut based off of fuzzy match score of names
            foundname = normalize(a['sort-name'].replace(', ', ' ').lower())
            foundname = re.sub(pattern='[^A-Za-z ]', repl='', string=foundname)
            cleanname = re.sub(pattern='[^A-Za-z ]', repl='', string=cleanname)
            
            tsr_score = fuzz.token_sort_ratio(cleanname, foundname)
            pr_score = fuzz.partial_ratio(cleanname, foundname)
            avg_score = (tsr_score + pr_score) / 2
            
            if avg_score < 80 or tsr_score < 70 or pr_score < 70:
                continue

            # save
            this_meta = {'index': i, 
                         'searchname': search.get('name'), 
                         'composerid': search.get('id'), 
                         'foundname': foundname, 'cleanname': cleanname,
                         'tsr_score': tsr_score, 'pr_score': pr_score, 
                         'avg_score': avg_score}
            this_match = flatten_artist(a)
            this_match.update(this_meta)

            final += [this_match]

    return pd.DataFrame(final)


def choose_best_match(data):
    # if single word name, this is hopeless
    if len(COMP_MAP[data.composerid.iloc[0]].split(',  ')) == 1:
        return None
    
    # filter by tagged composer
    n_tag_composer = sum(data.tag_composer)
    if n_tag_composer == 1:
        return data.loc[data.tag_composer==True, :]
    elif n_tag_composer > 1:
        data = data.loc[data.tag_composer==True, :].copy()
       
    # filter by disambig composer
    n_disambig_composer = sum(data.disambig_composer)
    if n_disambig_composer == 1:
        return data.loc[data.disambig_composer==True, :]
    elif n_disambig_composer > 1:
        data = data.loc[data.disambig_composer==True, :].copy()
    
    # take most aliases
    max_aliases = max(data.n_aliases)
    n_w_max_aliases = data.loc[data.n_aliases == max_aliases, :].shape[0]
    if max_aliases > 0 and n_w_max_aliases == 1:
        return data.loc[data.n_aliases == max_aliases, :]
    elif n_w_max_aliases > 1:
        data = data.loc[data.n_aliases == max_aliases, :].copy()
    
    # take most tags
    max_tags = max(data.n_tags)
    n_w_max_tags = data.loc[data.n_tags == max_tags, :].shape[0]
    if max_tags > 0 and n_w_max_tags == 1:
        return data.loc[data.n_tags == max_tags, :]
    
    # give up
    return None



In [21]:
# load data and run through the first pass match-per-match
with open('data/interim/mbz_composer_search.json', 'r') as f:
    data = json.load(f)

df = first_pass(data)

In [25]:
df

Unnamed: 0,area_id,area_name,avg_score,beginarea_id,beginarea_name,cleanname,composerid,country,disambig_composer,endarea_id,...,lifespan_end,lifespan_ended,mbzid,n_aliases,n_tags,pr_score,searchname,sortname,tag_composer,tsr_score
0,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,100.0,b86b7e97-c4e2-4ec2-942b-5a6cd8eea1da,Bonn,beethoven ludwig van,0,DE,False,afff1a94-a98b-4322-8874-3148139ab6da,...,1827-03-26,True,1f9df192-a621-4f54-8850-2c5373b7eac9,28,20,100,"Beethoven, Ludwig van","Beethoven, Ludwig van",True,100
1,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,100.0,d1a17bfe-b392-4aae-bf67-e6c6987363fe,Eutin,weber carl maria von,1,DE,False,f03d09b3-39dc-4083-afd6-159e3f0d462f,...,1826-06-05,True,c2d17829-1424-435b-9386-c77d3a920abe,9,5,100,"Weber, Carl Maria Von","Weber, Carl Maria von",True,100
2,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,88.0,ed091849-62dd-4d10-9d40-50594afb1b15,Bratislava,hummel johann,2,AT,False,c7644e45-dec4-43fd-aad6-35036b8e911d,...,1837-10-17,True,f7ef501b-2bc0-4083-ac52-3518255883a2,4,3,100,"Hummel, Johann","Hummel, Johann Nepomuk",True,76
3,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,100.0,b3c4f4b8-1a4c-48e6-93db-e278dce26270,Pesaro,rossini gioachino,4,IT,True,dc10c22b-e510-4006-8b7f-fecb4f36436e,...,1868-11-13,True,846be3c9-5f94-46ab-97b9-531335dd3658,13,7,100,"Rossini, Gioachino","Rossini, Gioachino",True,100
4,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,100.0,f0590317-8b42-4498-a2e4-34cc5562fcf8,Salzburg,mozart wolfgang amadeus,5,AT,True,afff1a94-a98b-4322-8874-3148139ab6da,...,1791-12-05,True,b972f589-fb0e-474e-b64a-803b0364fa75,24,16,100,"Mozart, Wolfgang Amadeus","Mozart, Wolfgang Amadeus",True,100
5,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,100.0,0ebe2c1b-f49e-4144-be01-1f146ee6e8b3,Catania,bellini vincenzo,7,IT,True,c65960ba-69b5-494f-b7d6-8dea87a3986c,...,1835-09-23,True,6f5bfd20-84cc-4879-8a40-05631ad576c7,6,6,100,"Bellini, Vincenzo","Bellini, Vincenzo",True,100
6,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,100.0,74601bdf-08ff-4efa-a1ac-d6020853ae53,Dinklage,romberg bernhard,8,DE,False,11a44e18-a2e5-43a9-bee9-aa4f7c83f967,...,1841-08-13,True,0040c89b-f2e6-4bc3-b75d-5152fb0c890e,0,2,100,"Romberg, Bernhard","Romberg, Bernhard",True,100
7,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,100.0,0ebe2c1b-f49e-4144-be01-1f146ee6e8b3,Catania,pacini giovanni,9,IT,False,36d9876d-38e9-49b4-9024-16055a9e1778,...,1867-12-06,True,9ee75435-3d4a-4455-bab6-aed7f7f8b2bc,0,0,100,"Pacini, Giovanni","Pacini, Giovanni",False,100
8,08310658-51eb-3801-80de-5a0739207115,France,100.0,31eea06c-a794-4138-879d-e633336275a2,Clermont-Ferrand,onslow george,10,FR,False,31eea06c-a794-4138-879d-e633336275a2,...,1853-10-03,True,d862c725-08b8-45bd-881b-3e6a9f295366,1,3,100,"Onslow, George","Onslow, George",True,100
9,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,100.0,28d44913-d1b1-4cc8-8c51-bf80a2a210cf,Geneva,thalberg sigismond,11,AT,False,86fbdd3c-aaac-4aed-916a-b361f7e1a8de,...,1871-04-27,True,480aa5f2-97e6-4e79-a364-6629d7b63b55,0,0,100,"Thalberg, Sigismond","Thalberg, Sigismond",False,100


In [2]:
# split off a dataframe of dupes
compcounts = df.composerid.value_counts()
idx_multicomps = df.composerid.isin(compcounts[compcounts >= 2].index)

finaldf = df.loc[~idx_multicomps].copy()
dupeddf = df.loc[idx_multicomps].copy()

# evaluate each set of potential composer matches
best_matches = dupeddf.groupby('composerid').apply(choose_best_match)

# and rejoin to main dataset
finaldf = finaldf.append(best_matches,ignore_index=True)
finaldf.set_index('composerid', inplace=True)

In [6]:
final_cols = [
    'mbzid',
    'sortname', 'gender',
    'country', 'area_id', 'area_name', 'beginarea_id', 'endarea_id',
    'lifespan_begin', 'lifespan_end', 'lifespan_ended',
    'n_aliases', 'n_tags', 'tag_composer'
]

In [7]:
finaldf[final_cols].to_csv('data/tmp/mbz_partial.txt', sep='\t')

In [33]:
finaldf.head()

Unnamed: 0_level_0,area_id,area_name,avg_score,beginarea_id,beginarea_name,cleanname,country,disambig_composer,endarea_id,endarea_name,...,lifespan_end,lifespan_ended,mbzid,n_aliases,n_tags,pr_score,searchname,sortname,tag_composer,tsr_score
composerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,100.0,b86b7e97-c4e2-4ec2-942b-5a6cd8eea1da,Bonn,beethoven ludwig van,DE,False,afff1a94-a98b-4322-8874-3148139ab6da,Wien,...,1827-03-26,True,1f9df192-a621-4f54-8850-2c5373b7eac9,28,20,100,"Beethoven, Ludwig van","Beethoven, Ludwig van",True,100
1,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,100.0,d1a17bfe-b392-4aae-bf67-e6c6987363fe,Eutin,weber carl maria von,DE,False,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,...,1826-06-05,True,c2d17829-1424-435b-9386-c77d3a920abe,9,5,100,"Weber, Carl Maria Von","Weber, Carl Maria von",True,100
2,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,88.0,ed091849-62dd-4d10-9d40-50594afb1b15,Bratislava,hummel johann,AT,False,c7644e45-dec4-43fd-aad6-35036b8e911d,Weimar,...,1837-10-17,True,f7ef501b-2bc0-4083-ac52-3518255883a2,4,3,100,"Hummel, Johann","Hummel, Johann Nepomuk",True,76
4,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,100.0,b3c4f4b8-1a4c-48e6-93db-e278dce26270,Pesaro,rossini gioachino,IT,True,dc10c22b-e510-4006-8b7f-fecb4f36436e,Paris,...,1868-11-13,True,846be3c9-5f94-46ab-97b9-531335dd3658,13,7,100,"Rossini, Gioachino","Rossini, Gioachino",True,100
5,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,100.0,f0590317-8b42-4498-a2e4-34cc5562fcf8,Salzburg,mozart wolfgang amadeus,AT,True,afff1a94-a98b-4322-8874-3148139ab6da,Wien,...,1791-12-05,True,b972f589-fb0e-474e-b64a-803b0364fa75,24,16,100,"Mozart, Wolfgang Amadeus","Mozart, Wolfgang Amadeus",True,100


### things to get from mbz
- fill out country where only area is available
- number of works
- number of recordings (but watch out for conductors?)
- main area? is that a thing?
- era of birth
- data of death [for comparison with first performance]

In [21]:
finaldf.loc[finaldf.country.isnull()].apply(lambda x: str(x.country) + str(x.area_name), axis=1).value_counts()

NoneNone             132
NoneEngland           12
NoneQuebec             3
NoneLondon             1
NoneOhio               1
NoneMassachusetts      1
NoneSanto Domingo      1
NoneVentura            1
NoneSanta Barbara      1
NoneNew York           1
NoneInnsbruck          1
NoneFlanders           1
NoneEast Sussex        1
NoneJerusalem          1
NoneDayton             1
NoneBoston             1
dtype: int64

In [26]:
finaldf[finaldf.country.isnull() & finaldf.area_id.notnull()]

Unnamed: 0_level_0,area_id,area_name,avg_score,beginarea_id,beginarea_name,cleanname,country,disambig_composer,endarea_id,endarea_name,...,lifespan_end,lifespan_ended,mbzid,n_aliases,n_tags,pr_score,searchname,sortname,tag_composer,tsr_score
composerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
404,2ea52ae1-00ee-406e-a8f1-50fb88554d24,Flanders,100.0,7f24b77b-abe7-42d9-b79c-ee09bbf5c83c,Liège,arcadelt jacques,,True,dc10c22b-e510-4006-8b7f-fecb4f36436e,Paris,...,1568,True,673a374b-c873-45cc-af91-aefc76304d59,3,2,100,"Arcadelt, Jacques","Arcadelt, Jacques",True,100
411,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,aff4808a-7e67-49ab-8d51-af0864c824e0,Somerset,bull john,,True,c02ebe2f-ecf0-4671-9f09-d997c0effb9a,Antwerp,...,1628-03-12,True,ca013e83-6a0a-4ce1-9b35-baaad4e697e0,1,1,100,"Bull, John","Bull, John",True,100
412,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,,,farnaby giles,,False,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,...,1640-11,True,5a59cfb0-45e4-4fa8-8604-2682aad19356,1,2,100,"Farnaby, Giles","Farnaby, Giles",True,100
528,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,24404231-7564-4b11-9168-95dd148049a7,Kent,hinton arthur,,True,9d5dd675-3cf4-4296-9e39-67865ebee758,England,...,1941-08-11,True,9a564dfe-a4ed-4daa-9323-9c101c4c23bc,0,0,100,"Hinton, Arthur","Hinton, Arthur",False,100
549,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,horn charles edward,,False,e331bfdf-b908-429c-a79b-710cf9c06abb,Boston,...,1849-10-21,True,904acb1c-dc70-49b5-84e8-101195182782,1,0,100,"Horn, Charles Edward","Horn, Charles Edward",False,100
556,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,,,wilbye john,,False,,,...,1638,True,f04b1bdd-d3cf-49d0-a860-f6c8554eac25,0,1,100,"Wilbye, John","Wilbye, John",True,100
980,9d5dd675-3cf4-4296-9e39-67865ebee758,England,94.0,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,barbirolli john,,False,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,...,1970-07-29,True,6af449ad-010c-4dc4-afcb-9866cfe619df,2,3,100,"Barbirolli, John","Barbirolli, John, Sir",False,88
1030,9d5dd675-3cf4-4296-9e39-67865ebee758,England,100.0,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,glover charles william,,False,,,...,1863-03-23,True,573b7449-5d04-4d59-8ee5-c839898ee1b7,1,0,100,"Glover, Charles William","Glover, Charles William",False,100
1107,f03d09b3-39dc-4083-afd6-159e3f0d462f,London,100.0,12c9edfe-2c1f-48ef-9d38-82494a9350a9,Wirral,baxter phil,,False,,,...,,,87c4a3f6-79ea-46f5-b176-8b9cc902f839,0,0,100,"Baxter, Phil","Baxter, Phil",False,100
1121,a510b9b1-404d-4e23-8db8-0f6585909ed8,Quebec,100.0,c3cc624e-b963-49cf-ad0b-e318cb341963,Montreal,mathieu andre,,True,,,...,1968-06-02,True,2507a669-e4e8-4a22-aa8c-7d5e59d51a46,1,1,100,"Mathieu, Andre","Mathieu, André",False,100


In [28]:
get_mbz_area('61b6a12e-4751-4fe3-b40d-ecb84f29736c')

East Sussex


{'id': '61b6a12e-4751-4fe3-b40d-ecb84f29736c',
 'name': 'East Sussex',
 'disambiguation': '',
 'relations': [{'type-id': 'de7cc874-8b1b-3a05-8272-f3834c968fb7',
   'begin': None,
   'source-credit': '',
   'attribute-values': {},
   'area': {'disambiguation': '',
    'id': '08328566-1f0f-4d1b-a887-12014d333aa4',
    'sort-name': 'Fletching',
    'name': 'Fletching'},
   'direction': 'forward',
   'ended': False,
   'type': 'part of',
   'end': None,
   'attributes': [],
   'target-type': 'area',
   'target-credit': ''},
  {'attribute-values': {},
   'area': {'name': 'Battle',
    'id': '0ca8bb81-1a89-459c-8c0a-9e9bd831d8d9',
    'sort-name': 'Battle',
    'disambiguation': ''},
   'type-id': 'de7cc874-8b1b-3a05-8272-f3834c968fb7',
   'source-credit': '',
   'begin': None,
   'target-credit': '',
   'direction': 'forward',
   'ended': False,
   'type': 'part of',
   'end': None,
   'attributes': [],
   'target-type': 'area'},
  {'begin': None,
   'source-credit': '',
   'type-id': 'de7c

In [9]:
finaldf[final_cols].sample(5)

Unnamed: 0_level_0,mbzid,sortname,gender,country,area_id,area_name,beginarea_id,endarea_id,lifespan_begin,lifespan_end,lifespan_ended,n_aliases,n_tags,tag_composer
composerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1958,3fe11713-6aa2-4124-8688-08b48ea03448,"Langgaard, Rued",male,DK,4757b525-2a60-324a-b060-578765d2c993,Denmark,e0e3c82a-aea8-48d3-beda-9e587db0b969,546a43db-1f9a-46b3-b1b4-54cb4836c3d3,1893-07-28,1952-07-10,True,1,3,True
2073,dd3c27db-7089-44cc-a5e8-6f4c435270bd,"Satoh, Somei",male,JP,2db42837-c832-3c27-b4a3-08198f75693c,Japan,6b14201f-d101-4609-9632-b610aac29173,,1947-01-19,,,3,4,True
1048,6db0064a-6b54-4ffc-9196-7cdb9d89ba3f,"Mignone, Francisco",male,BR,f45b47f8-5796-386e-b172-6c31b009a5d8,Brazil,,,1897-09-03,1986-02-02,True,1,0,False
1156,01d90b8d-78bf-4369-863f-17051f3d836b,"Kennan, Kent",male,US,489ce91b-6658-3307-9877-795b68554c98,United States,4dc3fa97-cf9b-43f0-bec9-fcc52d6215d5,58d2816b-daf9-4fc5-962c-06967f14a5e5,1913-04-18,2003-11-01,True,0,0,False
1860,c0f43106-2abf-426a-b950-cba2bda1bb21,"Amy, Gilbert",male,FR,08310658-51eb-3801-80de-5a0739207115,France,dc10c22b-e510-4006-8b7f-fecb4f36436e,,1936-08-29,,,0,4,True


In [10]:
len(set(
    finaldf.endarea_id.unique().tolist() + 
    finaldf.beginarea_id.unique().tolist()
))

1090

In [11]:
finaldf.iloc[0]

area_id              85752fda-13c4-31a3-bee5-0e5cb1f51dad
area_name                                         Germany
avg_score                                             100
beginarea_id         b86b7e97-c4e2-4ec2-942b-5a6cd8eea1da
beginarea_name                                       Bonn
cleanname                            beethoven ludwig van
country                                                DE
disambig_composer                                   False
endarea_id           afff1a94-a98b-4322-8874-3148139ab6da
endarea_name                                         Wien
foundname                            beethoven ludwig van
gender                                               male
index                                                   0
lifespan_begin                                 1770-12-17
lifespan_end                                   1827-03-26
lifespan_ended                                       True
mbzid                1f9df192-a621-4f54-8850-2c5373b7eac9
n_aliases     

In [14]:
finaldf.beginarea_id.sample().values

array(['74e50e58-5deb-4b99-93a2-decbb365c07f'], dtype=object)

In [15]:
import json
import requests
from time import sleep

import pandas as pd


BASE_URL = 'https://musicbrainz.org/ws/2/area/{}'
APP_HEADERS = {'User-Agent': 'NYPhil Concert Builder/0.01 (https://github.com/drewmcdonald/nyphil_concert_builder)'}


def get_mbz_area(id):
    
    params = {'fmt': 'json', 'inc': 'area-rels'}
    url = BASE_URL.format(id)
    r = requests.get(url, params=params, headers=APP_HEADERS)
    
    if r.status_code == 200:
        data = json.loads(r.content)
        print(data['sort-name'])
        if not 'iso-3166-2-codes' in data.keys():
            print('no iso code for ' + data['name'])
            rels_back = next(filter(lambda x: x['direction'] == 'backward', data['relations']))
            if rels_back is None:
                return None
            data = get_mbz_area(rels_back['area']['id'])
            if data and 'relations' in data.keys():
                data['relations'] = None
        return data
    
    if r.status_code == 503:
        print('Failed on {} ({})!'.format(id))

    print("Status code {} for {}".format(r.status_code, id))


In [20]:
# get_mbz_area(finaldf.beginarea_id.sample().values)
# get_mbz_area('f244bbd0-850e-4948-b288-3e10fcf39f34')
get_mbz_area(finaldf.loc[finaldf.beginarea_id.notnull()].beginarea_id.sample().values[0])

Neubau
no iso code for Neubau
Wien


{'iso-3166-2-codes': ['AT-9'],
 'id': 'afff1a94-a98b-4322-8874-3148139ab6da',
 'life-span': {'ended': False, 'begin': None, 'end': None},
 'type': 'City',
 'sort-name': 'Wien',
 'relations': None,
 'name': 'Wien',
 'disambiguation': '',
 'type-id': '6fd8f29a-3d0a-32fc-980d-ea697b69da78'}