In [1]:
import pandas as pd
%pylab inline

Populating the interactive namespace from numpy and matplotlib


## Code Bank as CSV

In [128]:
with open('../data/2021Codebanks.txt') as f:
    codes = [i.strip() for i in f.read().split('\n\n') if i]

In [324]:
d = dict()
for line in codes:
    if '\n' in line:
        code, comps = line.split('\n')
        code = code.split('.')
        k, v = code[1].lower(), code[-1]
        for comp in eval(comps):
            if comp not in d:
                d[comp] = dict()
            d[comp][k] = v

In [325]:
comps = pd.DataFrame.from_dict(d, 'index')
comps.head()

Unnamed: 0,code,live
"Adolphe, Julia",2,1
"Auerbach, Lera",2,1
"Balch, Katie",2,1
Björk Guðmundsdóttir,2,1
"Borisova-Ollas, Victoria",2,1


In [326]:
comps.isna().sum()

code    268
live     58
dtype: int64

In [329]:
comps['code'] = comps['code'].fillna(1).astype(int) # white-male if not listed
comps['live'] = comps['live'].fillna(2).astype(int) # dead if not listed
comps.loc[['Anon, Anon', 'TBD, TBD', 'Trad, Trad'], 'live'] = 0

comps.index.name = 'comp/arr'
comps.reset_index(inplace=True)

## Code Bank Issues

In [331]:
# duos? composer/arranger?
comps[comps['comp/arr'].str.lower().str.contains(r'&|/|arr\.')]

Unnamed: 0,comp/arr,code,live
121,"Floyd, Charles (Arr.)",3,1
128,He/Chen,3,1
138,"Johnson, James Weldon/Johnson, John Rosamond",3,1
195,"Coleridge-Taylor, Samuel (arr. Dworkin, Aaron)",3,2
199,"Ellington, Duke & Strayhorn, Billie",3,2
484,"Westlake, Nigel & Attar, Lior",1,1


In [332]:
# duplicates? Kaoru (aka Kishi Bashi) Ishibashi vs. Kaoru (Kishi Bashi) Ishibashi, Goto vs. Goto, Yo
names = pd.Series(comps['comp/arr'].str.split(', ', 1)).apply(pd.Series)
names.columns = ['last_name', 'first_name']

share = names.groupby('last_name')['first_name'].aggregate(firsts=lambda f: f.unique(), n=lambda f: len(f.unique()))
share[share['n'] > 1]

Unnamed: 0_level_0,firsts,n
last_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Adams,"[John, John Luther]",2
Adolphe,"[Julia, Bruce]",2
Anderson,"[David, Julian]",2
Carreño,"[Inocente, Teresa]",2
Coleridge-Taylor,"[Samuel, Samuel (arr. Dworkin, Aaron)]",2
Daugherty,"[Michael, Nathan]",2
Dawson,"[William Levi, Jay]",2
Ellington,"[Duke, Duke & Strayhorn, Billie]",2
Goto,"[nan, Yo]",2
Ishibashi,"[Kaoru (aka Kishi Bashi), Kaoru (Kishi Bashi)]",2


## Merging Code Bank with Datasets

In [307]:
season = pd.read_csv('../data/20-21_orchestra.csv')
season.columns = season.columns.str.lower()

In [308]:
def separate_comp_arr(comps):
    if 'arr.' in comps:
        comp, arr = comps.split('arr.')
        comp = comp.strip('() ')
        arr = arr.strip('() ')
        return [comp, arr]
    else: return [comps]

In [309]:
season[['composer', 'arranger']] = season['composer'].apply(separate_comp_arr).apply(pd.Series)

In [337]:
season = pd.merge(season, comps, how='left', left_on='composer', right_on='comp/arr').drop('comp/arr', axis=1).rename(columns=dict(code='comp.code', live='comp.live'))

Unnamed: 0,ensemble,composer,work,arranger,comp.code,comp.live
0,Oregon,"Abrahamsen, Hans","Left, alone",,1.0,1.0
1,Seattle,"Abrahamsen, Hans",Horn Concerto,,1.0,1.0
2,Amarillo,"Adams, John",Short Ride on a Fast Machine,,1.0,1.0
3,Arkansas,"Adams, John",Unknown Piece,,1.0,1.0
4,Cleveland,"Adams, John",Common Tones in Simple Times,,1.0,1.0
...,...,...,...,...,...,...
3669,San Francisco,Zhou Tian,Gift,,3.0,1.0
3670,Utah,Zhou Tian,Trace,,3.0,1.0
3671,Orlando,"Zhurbin, Lev","Pulse, a memorial (World premiere)",,1.0,1.0
3672,San Antonio,"Zisser, Na'ama",Island Mantras,,4.0,1.0
