In [1]:
import pandas as pd
import numpy as np

In [2]:
comp_main = pd.read_csv('data/interim/composer_key.txt', sep='\t')
comp_mbz = pd.read_csv('data/tmp/mbz_partial.txt', sep='\t')

In [3]:
work_composer = pd.read_csv('data/interim/work_key.txt', sep='\t')[['ID', 'composerID']]

Get to a list of programs, whether they were modelable concerts, and which composers played on them

Will help us get at which composers are irrelvant for modeling purposes

In [4]:
programs = pd.read_csv('data/split/prog_conc_split.txt', sep='\t')
programs = programs[['programID', 'eventType']]
programs.drop_duplicates(inplace=True)

etypes = pd.read_csv('data/manual/event_types.txt', sep='\t')
prog_work = pd.read_csv('data/interim/prog_work_cond_map.txt', sep='\t')[['programID', 'workID']]

programs = pd.merge(programs, etypes, on='eventType', how='left')
programs = pd.merge(programs, prog_work, on='programID', how='inner')
programs = pd.merge(programs, work_composer, left_on='workID', right_on='ID', how='left').drop('ID', axis=1)

df = programs[['programID', 'modelable', 'composerID']].drop_duplicates()

df = df.loc[df.modelable=='Y', :].drop('modelable', 1).copy()

In [5]:
df = pd.merge(df, comp_mbz[['composerid', 'mbzid']], left_on='composerID', right_on='composerid', how='left')
df['match'] = df.mbzid.notnull()

In [6]:
compcounts = df.groupby('composerID').programID.agg(np.count_nonzero).sort_values(ascending=False)
compcounts = pd.DataFrame(compcounts).reset_index()
compcounts.columns = ['composerID', 'n_programs']
compcounts = pd.merge(compcounts, comp_mbz, left_on='composerID', right_on='composerid', how='left')
compcounts = compcounts[['composerID', 'n_programs', 'mbzid']].copy()

In [7]:
compcounts['match'] = compcounts.mbzid.notnull()

In [8]:
compcounts = pd.merge(compcounts, comp_main, on='composerID', how='left')

In [11]:
compcounts.loc[~compcounts.match]

Unnamed: 0,composerID,n_programs,mbzid,match,composerName
17,14.0,628,,False,"Haydn, Franz Joseph"
39,30.0,259,,False,"Traditional,"
43,253.0,233,,False,"Elgar, Sir Edward"
57,336.0,128,,False,"Hadley, Henry Kimball"
66,274.0,102,,False,"Glazunov, Alexander"
78,397.0,74,,False,"Unspecified,"
85,349.0,64,,False,"Lyadov, Anatoli"
93,246.0,58,,False,"MacDowell, Edward A."
134,886.0,34,,False,"Strauss, Johann, Jr. & Josef"
139,216.0,33,,False,"Gretry [Grétry], Andre [André] Ernest Modeste"


In [12]:
import json
import requests
from time import sleep

import pandas as pd


BASE_URL = 'https://musicbrainz.org/ws/2/'
APP_HEADERS = {'User-Agent': 'NYPhil Concert Builder/0.01 (https://github.com/drewmcdonald/nyphil_concert_builder)'}


def search_composer(id, name):
    endpoint = 'artist'
    params = {
        'query': '{} AND type:person'.format(name),
        'fmt': 'json',
        'inc': 'aliases url-rels'
    }
    url = BASE_URL + endpoint
    r = requests.get(url, params=params, headers=APP_HEADERS)
    if r.status_code == 200:
        data = json.loads(r.content)
        data['search'] = {'id': id, 'name': name}
        return data
    if r.status_code == 503:
        print('Failed on {} ({})!'.format(id, name))

    print("Status code {} for {} ({})".format(r.status_code, id, name))


In [16]:
json.re

Unnamed: 0,composerid,mbzid,sortname,gender,country,area_id,area_name,beginarea_id,endarea_id,lifespan_begin,lifespan_end,lifespan_ended,n_aliases,n_tags,tag_composer
0,0,1f9df192-a621-4f54-8850-2c5373b7eac9,"Beethoven, Ludwig van",male,DE,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,b86b7e97-c4e2-4ec2-942b-5a6cd8eea1da,afff1a94-a98b-4322-8874-3148139ab6da,1770-12-17,1827-03-26,True,28,20,True
1,1,c2d17829-1424-435b-9386-c77d3a920abe,"Weber, Carl Maria von",male,DE,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,d1a17bfe-b392-4aae-bf67-e6c6987363fe,f03d09b3-39dc-4083-afd6-159e3f0d462f,1786-11-19,1826-06-05,True,9,5,True
2,2,f7ef501b-2bc0-4083-ac52-3518255883a2,"Hummel, Johann Nepomuk",male,AT,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,ed091849-62dd-4d10-9d40-50594afb1b15,c7644e45-dec4-43fd-aad6-35036b8e911d,1778-11-14,1837-10-17,True,4,3,True
3,4,846be3c9-5f94-46ab-97b9-531335dd3658,"Rossini, Gioachino",male,IT,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,b3c4f4b8-1a4c-48e6-93db-e278dce26270,dc10c22b-e510-4006-8b7f-fecb4f36436e,1792-02-29,1868-11-13,True,13,7,True
4,5,b972f589-fb0e-474e-b64a-803b0364fa75,"Mozart, Wolfgang Amadeus",male,AT,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,f0590317-8b42-4498-a2e4-34cc5562fcf8,afff1a94-a98b-4322-8874-3148139ab6da,1756-01-27,1791-12-05,True,24,16,True
5,7,6f5bfd20-84cc-4879-8a40-05631ad576c7,"Bellini, Vincenzo",male,IT,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,0ebe2c1b-f49e-4144-be01-1f146ee6e8b3,c65960ba-69b5-494f-b7d6-8dea87a3986c,1801-11-03,1835-09-23,True,6,6,True
6,8,0040c89b-f2e6-4bc3-b75d-5152fb0c890e,"Romberg, Bernhard",male,DE,85752fda-13c4-31a3-bee5-0e5cb1f51dad,Germany,74601bdf-08ff-4efa-a1ac-d6020853ae53,11a44e18-a2e5-43a9-bee9-aa4f7c83f967,1767-11-13,1841-08-13,True,0,2,True
7,9,9ee75435-3d4a-4455-bab6-aed7f7f8b2bc,"Pacini, Giovanni",male,IT,c6500277-9a3d-349b-bf30-41afdbf42add,Italy,0ebe2c1b-f49e-4144-be01-1f146ee6e8b3,36d9876d-38e9-49b4-9024-16055a9e1778,1796-02-02,1867-12-06,True,0,0,False
8,10,d862c725-08b8-45bd-881b-3e6a9f295366,"Onslow, George",male,FR,08310658-51eb-3801-80de-5a0739207115,France,31eea06c-a794-4138-879d-e633336275a2,31eea06c-a794-4138-879d-e633336275a2,1784-07-27,1853-10-03,True,1,3,True
9,11,480aa5f2-97e6-4e79-a364-6629d7b63b55,"Thalberg, Sigismond",male,AT,caac77d1-a5c8-3e6e-8e27-90b44dcc1446,Austria,28d44913-d1b1-4cc8-8c51-bf80a2a210cf,86fbdd3c-aaac-4aed-916a-b361f7e1a8de,1812-01-08,1871-04-27,True,0,0,False
