Test Notebook for Visualization


In [1]:
%load_ext autoreload
%autoreload 2
# import os
# os.chdir(/path/to/git_folder)
from tools.ms3 import *
import itertools
import scipy as sp
from plotly.offline import iplot
import plotly.express as px
import cufflinks as cf # for creating plots from pandas on the fly
import plotly.io as IO
cf.go_offline()
cf.set_config_file(theme='ggplot') #{'solar', 'pearl', 'white', 'ggplot'}

# configuration 

In [2]:
compute_all = False                              # Sets all 'compute_anew' flags to True
logging.getLogger().setLevel(logging.WARNING)    # logging level
data ='data'                                     # data folders
data_ms3 = os.path.join(data,'MuseScore_3')
data_tsv = os.path.join(data,'tsv')
note_features = None                             # TODO: Pass selected features as kwargs to get_notes()
#plt.rcParams['figure.figsize'] = [15,10]        # Change the plotsize for matplotlib in the entire notebook
std_plotsize = (1100, 500)                       # Standard plotsize (in px) for storing plotly images to disk 
                                                 # (the ones in the notebook adapt to windows size)

# helper functions

In [3]:
def plot(df, fname='test.png', kind='scatter', size=std_plotsize,  **kwargs):
    """Used to store plots as graphics."""
    fig = df.iplot(asFigure=True,kind=kind, **kwargs)
    #fig.update(layout={})
    w, h = size[0], size[1]
    IO.write_image(fig, fname, width=w, height=h)
    iplot(fig)

In [4]:
def bag_of_notes(df, tpc='tpc'):
    """Input: DataFrame including the columns ['tpc', 'duration'].
    Output: Note distribution."""
    tpcs = df.tpc
    occurring = np.sort(tpcs.unique())
    bag = pd.DataFrame(index=occurring, columns=['count_a', 'count_n', 'duration_a', 'duration_n'])
    GB = df.groupby('tpc')
    bag.count_a = GB.size()
    bag.count_n = bag.count_a / bag.count_a.sum()
    bag.duration_a = GB['duration'].sum().astype(float)
    bag.duration_n = (bag.duration_a / bag.duration_a.sum()).astype(float)
    if tpc != 'tpc':
        names = tpc.split('+')
        note_names = []
        for n in names:
            if n == 'tpc':
                note_names.append(occurring)
            elif n == 'name':
                note_names.append(tpc2name(occurring))
            elif n == 'degree':
                note_names.append(tpc2degree(occurring))
            elif n == 'pc':
                note_names.append(tpc2pc(occurring))
            else:
                logging.warning("Parameter tpc can only be {'tpc', 'name', 'degree', 'pc'} or a combination such as 'tpc+pc' or 'name+degree+tpc'.")
        L = len(note_names)
        if L == 0:
            note_names.append(bag.index)
            L = 1
        if L == 1:
            bag.index = note_names[0]
        else:
            bag.index = [f"{t[0]} ({', '.join(str(e) for e in t[1:])})" for t in zip(*note_names)]
    return bag

# loading the data
The dataframe `merged_ids` maps the 435 IDs to the files in the `data_ms3` folder. The different types of trios (trio I, trio II, alternative trio) are all replaced by the term 'trio'.

In [5]:
merged_ids = os.path.join(data_ms3, 'merged_ids.tsv')
files = pd.read_csv(merged_ids, sep='\t', index_col=0)
files.loc[files.dance.str.contains('trio'), 'dance'] = 'trio'
files.head()

Unnamed: 0_level_0,D,no,dance,path
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,41,1,menuett,041/D041menuett01a.mscx
2,41,1,trio,041/D041trio01b.mscx
3,41,2,menuett,041/D041menuett02a.mscx
4,41,2,trio,041/D041trio02b.mscx
5,41,3,menuett,041/D041menuett03a.mscx


**Accessibility**
We check whether all files are accessible.

In [6]:
missing = [file for file in files.path.values if not os.path.isfile(os.path.join(data_ms3, file))]
if missing != []:
    print("These files are missing:\n" + '\n'.join(missing))
else:
    print("All files found.")

All files found.


## computing or loading the dataframe representation of the music
We will be working on the data set in the form of note_lists. These can be newly computed from the scores or loaded from the precomputed TSV.

In [7]:
compute_anew = False

if compute_anew or compute_all:

    parse_score = lambda path: Score(os.path.join(data_ms3,path))
    score_objects = pd.DataFrame(files.path.apply(parse_score)).rename(columns={'path': 'object'})
    note_list = score_objects.groupby('id').apply(lambda df: df.iloc[0,0].get_notes(volta_warning=False))
    note_list.to_csv(os.path.join(data_tsv, 'note_list_complete.tsv'), sep='\t')
    measure_list = score_objects.groupby('id').apply(lambda df: df.iloc[0,0].info)\
                                .astype({'section': int, 
                                         'keysig': int, 
                                         'voices': int, 
                                         'volta': 'Int64', 
                                         'numbering_offset': 'Int64', 
                                         'dont_count': 'Int64'})
    measure_list.to_csv(os.path.join(data_tsv, 'measure_list_complete.tsv'), sep='\t')
    section_order = score_objects.applymap(lambda x: x.section_order).rename(columns = {'path': 'sections'})
    section_order.to_csv(os.path.join(data_tsv, 'section_order_complete.tsv'), sep='\t')
    
else:
    note_list = pd.read_csv(os.path.join(data_tsv, 'note_list_complete.tsv'), sep='\t', index_col=[0,1,2], 
                            dtype={'tied': 'Int64', 
                                   'volta': 'Int64'}, 
                            converters={'onset':frac, 
                                        'duration':frac, 
                                        'nominal_duration':frac, 
                                        'scalar':frac})
    measure_list = pd.read_csv(os.path.join(data_tsv, 'measure_list_complete.tsv'), sep='\t', index_col=[0,1], 
                               dtype={'volta': 'Int64', 
                                      'numbering_offset': 'Int64', 
                                      'dont_count': 'Int64'}, 
                               converters={'duration': frac, 
                                           'act_dur': frac, 
                                           'offset': frac, 
                                           'next': lambda l: [int(mc) for mc in l.strip('[]').split(', ') if mc != '']})
    section_order = pd.read_csv(os.path.join(data_tsv, 'section_order_complete.tsv'), sep='\t', index_col = [0])\
                      .rename(columns={'object': 'sections'})
## converters={'object': lambda l: [int(s) for s in l.strip('[]').split(', ')]}
                                
    note_list.head()

## loading ground truth data

### major vs minor ground truth


In [8]:
GT_ma_mi = pd.read_csv(os.path.join(data_tsv, 'GT_ma_mi.tsv'), sep='\t', index_col=[0])['mode']
GT_ma_mi.value_counts()

major    372
minor     63
Name: mode, dtype: int64

In [9]:
files['gt_mode'] = GT_ma_mi

# basic statistics

In [10]:
tpc_distribution = bag_of_notes(note_list, 'name')

### average note duration

The duration and count of tonal pitch-classes are analogously distributed over the corpus. This implies that the average duration of each note is roughly independent of its tonal pitch class.

In [11]:
average_duration = pd.DataFrame(tpc_distribution['duration_a']/tpc_distribution['count_a'], columns = ['Average duration'])

#average_duration.iplot('bar')

The common average duration is close to a quarter note, which is also the common tactus of all the dances (that are only written in 3/4 or 2/4 meter).

## distribution of transposed notes
All pitches are transposed to C major / a minor by substracting the key signature from `tpc`

*Example*: `keysig == 1` &rightarrow; 1 sharp &rightarrow; key of G major (or E minor) &rightarrow; tonic = G = `tpc == 1` &rightarrow; transposition to C major (or A minor) by subtracting 1

In [12]:
note_list_transposed = transpose_to_C(note_list, measure_list)                                                 
transposed_distribution = bag_of_notes(note_list_transposed, 'degree+name')

## entropy of the pitch class distribution

For each dance, we compute the entropy of the distribution of tonal pitch classes, normalised to the alphabet of all 35 tpcs up to 2 accidentals.

In [13]:
note_list_by_dance = note_list.groupby('id').apply(lambda x: bag_of_notes(x, 'tpc'))
entropies = note_list_by_dance.groupby('id').apply(lambda x: sp.stats.entropy(x.count_a)/np.log(35))

#entropies.iplot('hist', bins = 50, xTitle = 'Normalised entropy')



The entropy distribution seems to be independent of the key signature, with no significant (at 95% confidence and, if any, small in effect size) correlation between entropy and key signature or number of accidentals.

In [14]:
accidentals = note_list.merge(measure_list['keysig'], on=['id', 'mc'], right_index=True).groupby('id').keysig.max()
pd.DataFrame({'entropy': entropies, 'accidentals':  accidentals}).boxplot(column = 'entropy', by = 'accidentals');


#iplot('scatter', mode='markers', x = 'key_sig', y = 'entropy')

### Entropy of pitch distribution

In [15]:
files['entropy'] = entropies

In [16]:
#files.set_index('dance', append=True)\
#     .entropy\
#     .unstack(level=1)\
#     .iplot('box', yTitle = 'entropy' )

### Entropy of durations distribution

In [17]:
files['duration_entropy'] = note_list.groupby('id').duration.value_counts().groupby('id').apply(lambda x: sp.stats.entropy(x))

#files.set_index('dance', append=True)\
#     .duration_entropy\
#     .unstack(level=1)\
#     .iplot('box', yTitle = 'durations entropy' )

### Entropy of onsets distributions

In [18]:
files['onset_entropy'] = note_list.groupby('id').onset.value_counts().groupby('id').apply(lambda x: sp.stats.entropy(x))

#files.set_index('dance', append=True)\
#     .onset_entropy\
#     .unstack(level=1)\
#     .iplot('box', yTitle = 'onset entropy' )

## provisional mode classification

The historically influential Krumhansl & Kessler (1982) key profiles classify pitch classes according to their perceived fit into a major or minor tonal context. The correlation of the pitch-class distribution of each dance with a key profile provides a first estimate of the fit of that dance into a mode (cf. Krumhansl, 1990). It should be noted that these key profiles are based on behavioural responses of listeners rather than on corpus statistics. 

In [19]:
def KK(mode, transposition):
    """Krumhansl and Kessler (1982) key profiles for major and minor modes"""
    
    major = [6.20, 2.55, 3.45, 2.85, 4.22, 4.57, 2.67, 5.25, 2.45, 3.35, 2.70, 2.70]
    minor = [6.03, 3.35, 3.67, 5.28, 2.58, 3.55, 2.87, 4.80, 4.35, 2.67, 2.50, 3.42]
    
    major_by_fifths = [None] * len(major)
    minor_by_fifths = [None] * len(minor)
    
    #reorder by fifths
    for i in range(len(major)):
        major_by_fifths[(i*7)%12] = major[i]
        minor_by_fifths[((i*7)+3)%12] = minor[i]
    
    #major = list(map(lambda x: x/sum(major), major))
    #minor = list(map(lambda x: x/sum(minor), minor))
    
    if mode == 0:   
        return major_by_fifths[-transposition:]+major[:-transposition]
    elif mode == 1:
        return minor_by_fifths[-transposition:]+minor[:-transposition]
    else:
        print('0 = major, 1 = minor')

### key profiles

In [20]:
pc_list = note_list_transposed
pc_list.tpc = pc_list.tpc.apply(lambda x: x%12)
pc_list_distr = pc_list.groupby('id').apply(lambda x: bag_of_notes(x, 'tpc'))

def full_distr(bag, index):
    key_profile_index = pd.MultiIndex.from_product([index, list(range(12))])
    key_profile = pd.DataFrame(index = key_profile_index, columns = ['profile']).fillna(0)

    for i in bag.index:
        key_profile.loc[i] = bag.loc[i].duration_n
    return key_profile

In [21]:
glob_key_profile = full_distr(pc_list_distr, files.index)
major_corr = pd.DataFrame(glob_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(0,0))[0])).rename(columns = {'profile': 'major'})
minor_corr = pd.DataFrame(glob_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(1,0))[0])).rename(columns = {'profile': 'minor'})
key_corr = pd.concat([major_corr, minor_corr], axis = 1)

In [22]:
major_dances_prov = files.loc[key_corr[key_corr.minor<=key_corr.major].index]
minor_dances_prov = files.loc[key_corr[key_corr.minor>key_corr.major].index]

print(f'According to this preliminary classification, there are {len(major_dances_prov)} dances whose global pitch-class distribution matches the major key profile better than the minor one, and {len(minor_dances_prov)} whose global pitch-class distribution matches the minor key profile better than the major one. ')

According to this preliminary classification, there are 401 dances whose global pitch-class distribution matches the major key profile better than the minor one, and 34 whose global pitch-class distribution matches the minor key profile better than the major one. 


In [23]:
#minor_dances_prov.groupby('dance').size().iplot('bar', title = 'Distribution of minor mode dances (provisional)')

In [24]:
dances_mode = pd.DataFrame(files.groupby('dance').gt_mode.value_counts())
dances_mode.rename(columns = {'gt_mode': 'count'}, inplace = True)
dances_mode.reset_index(inplace = True)


In [25]:
#px.bar(dances_mode, x = 'dance', y = 'count', color = 'gt_mode')

### mode recognition based on first section alone

In [26]:
pc_list_distr_A = pc_list[pc_list.index.get_level_values('section') == 0].groupby('id').apply(lambda x: bag_of_notes(x, 'tpc'))

In [27]:
A_key_profile = full_distr(pc_list_distr_A, files.index)
major_corr_A = pd.DataFrame(A_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(0,0))[0])).rename(columns = {'profile': 'major'})
minor_corr_A = pd.DataFrame(A_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(1,0))[0])).rename(columns = {'profile': 'minor'})
key_corr_A = pd.concat([major_corr_A, minor_corr_A], axis = 1)


In [28]:
major_dances_A = files.loc[key_corr_A[key_corr_A.minor<=key_corr_A.major].index]
minor_dances_A = files.loc[key_corr_A[key_corr_A.minor>key_corr_A.major].index]

print(f'When looking at the pitch distribution of first sections only, there are {len(minor_dances_A)} dances whose pitch profile is closer to the minor KK key profile than to the major one.')

When looking at the pitch distribution of first sections only, there are 55 dances whose pitch profile is closer to the minor KK key profile than to the major one.


In [29]:
GT_minor = pd.DataFrame(GT_ma_mi)
GT_minor = GT_minor[GT_minor['mode']=='minor'].index

detected_minor_A = minor_dances_A[minor_dances_A.index.isin(GT_minor)]
falsely_detected_minor_A = minor_dances_A[~minor_dances_A.index.isin(detected_minor_A.index)]
not_detected_minor_A = major_dances_A[major_dances_A.index.isin(GT_minor)]

print(f'This simple model successfully recognises {len(detected_minor_A)} of the {len(GT_minor)} dances labeled as minor in our ground-truth. It wrongly labels {len(falsely_detected_minor_A)} dances as minor and fails to recognise {len(not_detected_minor_A)} minor dances.')

This simple model successfully recognises 51 of the 63 dances labeled as minor in our ground-truth. It wrongly labels 4 dances as minor and fails to recognise 12 minor dances.


#### distribution of false positives/negatives by dance type

In [30]:
falsely_detected_minor_A.merge(key_corr_A, left_index = True, right_index = True)

Unnamed: 0,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,major,minor
338,783,1,deutscher,783/D783deutscher01.mscx,major,0.624948,0.750338,1.274454,0.475333,0.744253
346,783,9,deutscher,783/D783deutscher09.mscx,major,0.560845,0.808492,1.391723,0.0923,0.165038
369,816,3,ecossaise,816/D816ecossaise03.mscx,major,0.582274,0.868209,0.913351,0.677665,0.744747
415,972,2,deutscher,972/D972deutscher02.mscx,major,0.523915,0.693094,1.611656,0.0923,0.165038


- D783.1 shifts immediately to relative minor. 
- D783.9 is parsed so that section 0 is only the pick-up measure. 
- D816.3 shifts to relative minor after first phrase.
- D972.2 is parsed so that section 0 is only the pick-up measure.

Dances with ID = 60, 103, 196, 223, 267, 275, 307, 346, 349, 375, 394, 413, 415 have the pick-up section issue.

In [31]:
not_detected_minor_A.merge(key_corr_A, left_index = True, right_index = True)

Unnamed: 0,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,major,minor
56,128,10,deutscher,128/D128deutscher10.mscx,minor,0.562569,1.307103,1.588871,0.577539,0.222567
63,145,3,walzer,145/D145walzer03.mscx,minor,0.614695,0.716351,1.366746,0.71839,0.605821
69,145,9,walzer,145/D145walzer09.mscx,minor,0.592727,1.226204,1.410805,0.848188,0.598276
194,366,9,ländler,366/D366ländler09.mscx,minor,0.604576,0.923784,1.41735,0.756567,-0.022396
198,366,13,ländler,366/D366ländler13.mscx,minor,0.598615,0.833152,1.460061,0.663618,0.315752
314,779,23,walzer,779/D779walzer23.mscx,minor,0.57587,0.819098,1.590646,0.713975,0.375713
342,783,5,deutscher,783/D783deutscher05.mscx,minor,0.540814,0.982174,1.643418,0.868694,0.299102
348,783,11,deutscher,783/D783deutscher11.mscx,minor,0.548716,0.85753,1.624273,0.706857,0.651459
351,783,14,deutscher,783/D783deutscher14.mscx,minor,0.633543,0.830951,1.493292,0.718854,0.709965
355,783,2,ecossaise,783/D783ecossaise02.mscx,minor,0.555444,0.978762,1.007479,0.835016,0.165022


- D128.10 starts ambiguously, but F#min, but then Dmaj. I'd say it is Dmaj. 
- D145.3 starts Amin-->Cmaj, then Amaj
- D145.9 is F#min. Shift to the dominant might be confusing for the model.
- D366.9 I would say this is definitely Bmaj, although it starts in Bmin. The key signature is Bmaj (hence neg. corr with minor).
- D366.13 is Bbmin. Shift to relative major might be confusing for the model.
- D779.23 BEAUTIFUL! is Ebmaj, but starts with II/Gmin, and then moves to Eb through a deceptive cadence.
- D783.5 BEAUTIFUL! is Dmaj but starts with Bmin (just colouring).
- D783.11 starts Emin, then goes Gmaj till the end (correlation very close).
- D783.14 starts Fmin, then Abmaj, then second part in Fmaj (corr. very close).
- D783.2(ecossaise) starts Bmin (omophonic) but abruptly moves to Dmaj
- D924.11 starts in Emin, but just colouring. No cadence.
- D969.5 model is wrong. Strong Amin/Emin in the beginning.

It seems that deutscher and walzers are more likely to exhibit modal shifts in the beginning.

#### mode recognition based on first phrase

In [32]:
pc_list_distr_first = pc_list[(pc_list.index.get_level_values('section') == 0) & (pc_list.mc <= 4)].groupby('id').apply(lambda x: bag_of_notes(x, 'tpc'))

In [33]:
first_key_profile = full_distr(pc_list_distr_first, files.index)
major_corr_first = pd.DataFrame(first_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(0,0))[0])).rename(columns = {'profile': 'major'})
minor_corr_first = pd.DataFrame(first_key_profile.groupby(axis = 0, level = 0).profile.apply(lambda x: sp.stats.pearsonr(x, KK(1,0))[0])).rename(columns = {'profile': 'minor'})
key_corr_first = pd.concat([major_corr_first, minor_corr_first], axis = 1)


In [34]:
major_dances_first = files.loc[key_corr_first[key_corr_first.minor<=key_corr_first.major].index]
minor_dances_first = files.loc[key_corr_first[key_corr_first.minor>key_corr_first.major].index]

print(f'When looking at the pitch distribution of first phrase only, there are {len(minor_dances_first)} dances whose pitch profile is closer to the minor KK key profile than to the major one.')

When looking at the pitch distribution of first phrase only, there are 68 dances whose pitch profile is closer to the minor KK key profile than to the major one.


In [35]:
detected_minor_first = minor_dances_first[minor_dances_first.index.isin(GT_minor)]
falsely_detected_minor_first = minor_dances_first[~minor_dances_first.index.isin(detected_minor_first.index)]
not_detected_minor_first = major_dances_first[major_dances_first.index.isin(GT_minor)]

print(f'This simple model successfully recognises {len(detected_minor_first)} of the {len(GT_minor)} dances labeled as minor in our ground-truth. It wrongly labels {len(falsely_detected_minor_first)} dances as minor and fails to recognise {len(not_detected_minor_first)} minor dances.')

This simple model successfully recognises 57 of the 63 dances labeled as minor in our ground-truth. It wrongly labels 11 dances as minor and fails to recognise 6 minor dances.


In [36]:
falsely_detected_minor_first.merge(key_corr_first, left_index = True, right_index = True)

Unnamed: 0,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,major,minor
29,41,15,menuett,041/D041menuett15a.mscx,major,0.555932,1.390226,2.089522,0.509178,0.846547
37,41,19,menuett,041/D041menuett19a.mscx,major,0.599165,1.446722,1.944649,0.664435,0.830364
44,91,2,menuett,091/D091menuett02a.mscx,major,0.583645,1.356809,1.735498,0.756829,0.830454
51,128,5,deutscher,128/D128deutscher05.mscx,major,0.555251,1.145856,1.503694,0.703286,0.849986
125,146,16,walzer,146/D146walzer16.mscx,major,0.567782,0.87831,1.564401,0.437819,0.7703
220,420,3,deutscher,420/D420deutscher03.mscx,major,0.570527,0.861284,1.233569,0.576944,0.584717
229,420,12,deutscher,420/D420deutscher12.mscx,major,0.60764,1.437751,1.655534,0.680403,0.825352
231,421,3,ecossaise,421/D421ecossaise03.mscx,major,0.647691,0.987865,0.837939,0.715494,0.82697
299,779,8,walzer,779/D779walzer08.mscx,major,0.581138,0.943374,1.584548,0.518983,0.688841
346,783,9,deutscher,783/D783deutscher09.mscx,major,0.560845,0.808492,1.391723,0.0923,0.165038


- D041.15 beginning strongly minor
- D041.19 beginning has strong 'minor' feel, but it is just colour.
- D091.2  beginning has strong 'minor' feel, but it is just colour.
- D128.5 progression by thirds over the first phrase (which, however, should include bar 5)
- D146.16 minor start.
- D420.3 first phrase shifts to Bmin.
- D420.12 slightest minor colour in first phrase, within progression by thirds.
- D421.3 long Abmaj chord in bar 4
- D779.8 cadence in Bmin
- D783.9 section 0 is only pick-up measure
- D972.2 section 0 is only pick-up measure


In [37]:
not_detected_minor_first.merge(key_corr_first, left_index = True, right_index = True)

Unnamed: 0,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,major,minor
194,366,9,ländler,366/D366ländler09.mscx,minor,0.604576,0.923784,1.41735,0.54902,-0.166303
245,643,1,deutscher,643/D643deutscher01.mscx,minor,0.67236,0.728341,1.690061,0.70975,0.69698
314,779,23,walzer,779/D779walzer23.mscx,minor,0.57587,0.819098,1.590646,0.226579,0.219348
342,783,5,deutscher,783/D783deutscher05.mscx,minor,0.540814,0.982174,1.643418,0.661921,0.576631
355,783,2,ecossaise,783/D783ecossaise02.mscx,minor,0.555444,0.978762,1.007479,0.838473,0.489397
389,924,11,walzer,924/D924walzer11.mscx,minor,0.554017,0.781686,1.310137,0.880939,0.642622


- D366.9 I would say this is definitely Bmaj, although it starts in Bmin. The key signature is Bmaj (hence neg. corr with minor).
- D643.1 cadence in Emaj
- D779.23 BEAUTIFUL! is Ebmaj, but starts with II/Gmin, and then moves to Eb through a deceptive cadence.
- D783.5 BEAUTIFUL! is Dmaj but starts with Bmin (just colouring).
- D783.2(ecossaise) starts Bmin (omophonic) but abruptly moves to Dmaj
- D924.11 starts in Emin, but just colouring. No cadence.

Deutscher and Waltzes definitely more likely to exhibit modal shift/colouring in the beginning w.r.t. Landler.

In [38]:
test_files = files.head(10)

# Mode tracking

In [39]:
def mode_slider(dance, m):
    '''Returns the correlation with major and minor key profiles in a 4-measures window starting at m'''
    window = (note_list.mn >= m) & (note_list.mn < m+4)
    bag = bag_of_notes(note_list_transposed[window].loc[dance], 'tpc')
    
    key_profile = pd.DataFrame(index = list(range(12)), columns = ['profile']).fillna(0)
    for i in bag.index:
        key_profile.loc[i] = bag.duration_n.loc[i]

    major = sp.stats.pearsonr(key_profile.profile, KK(0,0))[0]
    minor = sp.stats.pearsonr(key_profile.profile, KK(1,0))[0]
    
    return [major, minor]

In [40]:
def maj_trajectory(dance):
    print(dance.name)
    dance = dance.name
    mode = []
    for i in range(len(measure_list.loc[dance])-4):
        mode += [mode_slider(dance, i)]
    mode_frame = pd.DataFrame(mode, columns = ['major', 'minor'])
    return mode_frame['major'].to_list()    

def min_trajectory(dance):
    print(dance.name)
    dance = dance.name
    mode = []
    for i in range(len(measure_list.loc[dance])-4):
        mode += [mode_slider(dance, i)]
    mode_frame = pd.DataFrame(mode, columns = ['major', 'minor'])
    return mode_frame['minor'].to_list()  

Calculate the major/minor mode trajectory for every piece

In [41]:
test_files['maj_traj'] = test_files.apply(maj_trajectory, axis=1)

1
2
3
4
5
6
7
8
9
10


In [42]:
test_files['min_traj'] = test_files.apply(min_trajectory, axis=1)

1
2
3
4
5
6
7
8
9
10


# Key tracking

In [45]:
key_names = {0: 'C', 1: 'G', 2: 'D', 3: 'A', 4:'E', 5:'B', 6:'F#/Gb', 7:'C#/Db', 8:'G#/Ab', 9:'D#/Eb', 10:'A#/Bb', 11:'F'}
mode_names = {0: 'major', 1: 'minor'}

In [46]:
def key_slider(dance, m):
    '''Returns the correlation with major and minor key profiles in a 4-measures window starting at m'''
    window = (note_list.mn >= m) & (note_list.mn < m+4)
    bag = bag_of_notes(note_list_transposed[window].loc[dance], 'tpc')
    
    key_profile = pd.DataFrame(index = list(range(12)), columns = ['profile']).fillna(0)
    for i in bag.index:
        key_profile.loc[i] = bag.duration_n.loc[i]
        
    temp = pd.Series(list(itertools.product([0,1], range(12))), index = pd.MultiIndex.from_product([['major', 'minor'], ['C', 'G',  'D',  'A', 'E','B', 'F#/Gb', 'C#/Db','G#/Ab', 'D#/Eb', 'A#/Bb', 'F']]))
    temp = temp.apply(lambda x: KK(x[0], x[1]-3*x[0]))
    temp = temp.apply(lambda x: sp.stats.pearsonr(key_profile.profile, x)[0])
    
    
    return temp

In [47]:
def key_trajectory(dance):
    local_max = []
    trajectory = pd.DataFrame()
    
    for i in range(len(measure_list.loc[dance])-4):
        keys = key_slider(dance,i).copy()
    
        
        trajectory = pd.concat([trajectory, keys], axis = 1, ignore_index = True)
        local_max += [keys.idxmax()]
        
    
    
    return [trajectory, set(local_max)]

In [48]:
def key_plot(dance):
    key_trajectory(dance)[0][key_trajectory(dance)[0].index.isin(key_trajectory(dance)[1])].transpose().iplot('scatter')

In [49]:
key_plot(1)

### Clustering keys

### First section

In [50]:
all_corr = files.merge(key_corr_A, left_index = True, right_index = True)
px.scatter(all_corr, x = 'major', y = 'minor', color = 'dance')

In [56]:
all_corr['gt_mode'] = GT_ma_mi

px.scatter(all_corr, x = 'major', y = 'minor', color = 'gt_mode')

In [57]:
test_files.loc[test_files['gt_mode']=='minor'].head()

Unnamed: 0_level_0,D,no,dance,path,gt_mode,entropy,duration_entropy,onset_entropy,maj_traj,min_traj
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
7,41,4,menuett,041/D041menuett04a.mscx,minor,0.561346,1.098277,1.944786,"[0.3073222318928299, 0.2387442668085816, 0.170...","[0.8054444090049824, 0.7483545875906985, 0.555..."


In [58]:
a = [1, 2, 4]
len(a)

3

Initialize figure

In [65]:
index = 1

button_layer_1_height = 1.08
updatemenus=[
    go.layout.Updatemenu(
        buttons=list([
            dict(
                args=["index", 3],
                label="Menuett 1",
                method="restyle"
            ),
            dict(
                args=["index", 4],
                label="Menuett 2",
                method="restyle"
            ),
            dict(
                args=["index", 5],
                label="Waltz 1",
                method="restyle"
            )
        ]),
        direction="down",
        pad={"r": 10, "t": 10},
        showactive=True,
        x=0.1,
        xanchor="left",
        y=button_layer_1_height,
        yanchor="top"
    ),
    go.layout.Updatemenu(
        buttons=list([
            dict(
                args=[None, dict(frame=dict(duration=500, redraw=False),
                                 fromcurrent=True, transition=dict(duration=300, easing='quadratic-in-out'))],
                label="Play",
                method="animate"
            ),
            dict(
                args=[None, dict(frame=dict(duration=0, redraw=False),
                                   mode='immediate', transition=dict(duration=0))],
                label="Pause",
                method="animate"
            )
        ]),
        direction="left",
        pad={"r": 10, "t": 10},
        showactive=False,
        type = 'buttons',
        x=0.37,
        xanchor="left",
        y=button_layer_1_height,
        yanchor="top"
    )
]


data=[go.Scatter(x=all_corr.loc[all_corr['gt_mode'] == 'major']['major'].to_list(),
                 y=all_corr.loc[all_corr['gt_mode'] == 'major']['minor'].to_list(),
                 mode="markers",
                 marker=dict(color='blue')),
     go.Scatter(x=all_corr.loc[all_corr['gt_mode'] == 'major']['major'].to_list(),
                 y=all_corr.loc[all_corr['gt_mode'] == 'major']['minor'].to_list(),
                 mode="markers",
                 marker=dict(color='blue')),
     go.Scatter(x=all_corr.loc[all_corr['gt_mode'] == 'minor']['major'].to_list(),
                y=all_corr.loc[all_corr['gt_mode'] == 'minor']['minor'].to_list(),
                mode="markers",
                marker=dict(color='red')),
     go.Scatter(x=all_corr.loc[all_corr['gt_mode'] == 'minor']['major'].to_list(),
                y=all_corr.loc[all_corr['gt_mode'] == 'minor']['minor'].to_list(),
                mode="markers",
                marker=dict(color='red'))]


maj_corr = test_files.iloc[index]['maj_traj'][0]
min_corr = test_files.iloc[index]['min_traj'][0]
frame = {"data": []}
data_dict = {"x": [maj_corr], "y": [min_corr],
    "mode": "markers",
    "marker": {
        "color": "green", "sizeref": 100, "size": 20
    }
}
data.append(data_dict)  



sliders_dict = dict(active=1,
                    currentvalue=dict(font=dict(size=20),
                                                prefix='Bar ',
                                                visible=False),
                    transition=dict(duration=300, easing='cubic-in-out'),
                    steps=[])

frames = []
for bar in range(len(test_files.iloc[index]['maj_traj'])):    
    frame = {"data": [], "name": str(bar)}
    # Create one frame per bar
    maj_corr = test_files.iloc[index]['maj_traj'][bar]
    min_corr = test_files.iloc[index]['min_traj'][bar]
    frame = {"data": []}
    data_dict = {"x": [maj_corr], "y": [min_corr],
        "mode": "markers",
        "marker": {
            "color": "green", "sizeref": 100, "size": 20
        }
    }
    frame["data"].append(data_dict)  
    frames.append(frame)
    
    
    slider_step = dict(args=[[bar], dict(frame=dict(duration=100, redraw=False),
                                       mode='immediate',
                                       transition=dict(duration=100))],
                       label=bar,
                       method='animate')
    sliders_dict['steps'].append(slider_step)
    
layout=go.Layout(width=1000, height=700,
                 title="Emotionmeter",
                 updatemenus=updatemenus,
                 sliders=[sliders_dict],
                )
#print(layout['sliders'])
    
fig = go.Figure(data, layout, frames)
fig.show()
