# Introduction
This organises the linear data, calculates anomalies, and provides some overview statistics

## Setup

In [36]:
from music21 import *
import os
import csv
import itertools as it
import statistics
import numpy as np
import pandas as pd
import math

In [37]:
linear_units_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Linear%20Harmonies%20-%20Transposed%20Normal%20Order.csv'
linear_units = pd.read_csv(linear_units_url, error_bad_lines=False)
metadata_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Metadata.csv'
metadata = pd.read_csv(metadata_url, error_bad_lines=False)
verticalities_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Verticalities%20-%20Normal%20Order.csv'
verticalities = pd.read_csv(verticalities_url, error_bad_lines=False)
linear_units_pf_url = 'https://raw.githubusercontent.com/anonymousmuso/Viennese-Trichord/main/Data/Linear%20Harmonies%20-%20Prime%20Form.csv'
linear_units_new_pf = pd.read_csv(linear_units_pf_url, error_bad_lines=False)
linear_units_new_no = pd.read_csv(linear_units_url, error_bad_lines=False)


In [38]:
no_of_parts_data = []
duration_data = []
chronological_position_data = []

for i in linear_units.index.values:
    for j in range(0, 107):
        if i == metadata.iloc[j:j+1, :].index.values:
            position = j
            tempDf = metadata.iloc[j:j+1]
            tempDf = tempDf.drop(columns=['Filestring', 'Opus Number', 'Movement Number', 'Corpus Position', 'Vocal Or Instrumental', 'Date'])
            no_of_parts_data.append(float(tempDf['Number of Parts'].values))
            duration_data.append(float(tempDf['Duration'].values))
            chronological_position_data.append(float(tempDf['Chronological Position'].values))
no_of_parts_data.append('')
duration_data.append('')
chronological_position_data.append(108)
linear_units['Number of Parts'] = no_of_parts_data
linear_units['Duration'] = duration_data
linear_units['Chronological Position'] = chronological_position_data
linear_units = linear_units.sort_values(by=['Chronological Position'])

# Prime Form

In [49]:
## this compiles VT data from the list of all linear trichords

linear_units_cropped = linear_units.drop(columns=['Number of Parts', 'Duration', 'Chronological Position'])
linear_units_cropped = linear_units_cropped.transpose().drop(columns = ['Total']).transpose()
newVerticalities = linear_units_cropped
listOfColumns = list(newVerticalities.columns)
listOfTitles = list(newVerticalities.index.values.flat)
verticalityPosition1 = listOfColumns.index('[0, 1, 6]')
verticalityPosition2 = listOfColumns.index('[0, 5, 6]')

firstValues = newVerticalities.iloc[:, verticalityPosition1:verticalityPosition1+1]
secondValues = newVerticalities.iloc[:, verticalityPosition2:verticalityPosition2+1]

frames = [firstValues, secondValues]

combined = pd.concat(frames, axis=1)

totals = []
for i in range(0,len(combined.index.values)):
    tempDf = combined.iloc[i:i+1, :]
    totals.append(sum(tempDf.values.flat))
    
combined['Total'] = totals
        
combined = combined.reset_index()
combined = combined.rename(columns={"index": "Title"})
combined

Unnamed: 0,Title,"[0, 1, 6]","[0, 5, 6]",Total
0,Op. 1,1.719942,1.249392,2.969333
1,Op. 2,2.402957,0.000000,2.402957
2,Op. 3 i,3.816794,5.343511,9.160305
3,Op. 3 ii,7.981221,13.145540,21.126761
4,Op. 3 iii,6.122449,4.081633,10.204082
...,...,...,...,...
102,Op. 31 v,0.367647,0.000000,0.367647
103,Op. 31 vi,0.000000,0.000000,0.000000
104,Op. 31 i,4.938272,0.000000,4.938272
105,Op. 31 ii,0.000000,0.000000,0.000000


In [41]:
## this provides some overview statistics

firstCount = []
for i in combined.iloc[:, 1:2].values.flat:
    if i == 0:
        pass
    else:
        firstCount.append(i)
print('[0, 1, 6] appears in', len(firstCount), 'movements')
print('The median value of these proportions is', statistics.median(firstCount))
print('The mean value of these proportions is', statistics.mean(firstCount))

secondCount = []
for i in combined.iloc[:, 2:3].values.flat:
    if i == 0:
        pass
    else:
        secondCount.append(i)
print('[0, 5, 6] appears in', len(secondCount), 'movements')
print('The median value of these proportions is', statistics.median(secondCount))
print('The mean value of these proportions is', statistics.mean(secondCount))

[0, 1, 6] appears in 87 movements
The median value of these proportions is 4.3478260869565215
The mean value of these proportions is 4.545833743264162
[0, 5, 6] appears in 85 movements
The median value of these proportions is 4.25531914893617
The mean value of these proportions is 5.023494484979272


In [42]:
organisedMetadata = metadata.reset_index()
organisedMetadata = organisedMetadata.drop(columns=['Title'])
orderedData = combined.set_index("Title")
frames = [organisedMetadata, combined]

combinedWithMetadata = pd.concat(frames, axis=1)
combinedWithMetadata = combinedWithMetadata.set_index('Title')
combinedWithMetadata = combinedWithMetadata.drop(columns=['Filestring', 'Opus Number', 'Movement Number', 'Corpus Position', 'Vocal Or Instrumental']) 

In [45]:
## this provides some correlations with other variables

corr = combinedWithMetadata.corr(method = 'spearman')
corr

Unnamed: 0,Number of Parts,Duration,Date,Chronological Position,"[0, 1, 6]","[0, 5, 6]",Total
Number of Parts,1.0,0.282848,0.119598,0.16328,-0.297561,-0.326383,-0.306356
Duration,0.282848,1.0,0.263383,0.281398,-0.203304,-0.29589,-0.287453
Date,0.119598,0.263383,1.0,0.995109,-0.182577,-0.178367,-0.201712
Chronological Position,0.16328,0.281398,0.995109,1.0,-0.206262,-0.204459,-0.225347
"[0, 1, 6]",-0.297561,-0.203304,-0.182577,-0.206262,1.0,0.723464,0.908885
"[0, 5, 6]",-0.326383,-0.29589,-0.178367,-0.204459,0.723464,1.0,0.932863
Total,-0.306356,-0.287453,-0.201712,-0.225347,0.908885,0.932863,1.0


In [46]:
## this calculates anomalous VT values (Prime Form)

verticalities_pf = linear_units_new_pf
for i in range(0, len(verticalities_pf)-1):
    temp_df = verticalities_pf.iloc[i:i+1, :]
    
    newData = temp_df.loc[:, (temp_df != 0).any(axis=0)]
    anomalyDataValues = newData.values
    listOfValues= []
    for el in anomalyDataValues.flat:
        listOfValues.append(el)
    firstQuartile = np.percentile(listOfValues, 25)
    thirdQuartile = np.percentile(listOfValues, 75)
    IQR = thirdQuartile-firstQuartile

    topQuartileCutoff = thirdQuartile+1.5*IQR
    bottomQuartileCutoff = firstQuartile-1.5*IQR
    listOfAnomalies = []
    for el in listOfValues:
        if el > topQuartileCutoff:
            temp = []
            temp.append(newData.columns.values[listOfValues.index(el)])
            temp.append(el)
            listOfAnomalies.append(temp)
        if el < bottomQuartileCutoff:
            temp = []
            temp.append(newData.columns.values[listOfValues.index(el)])
            temp.append(el)
            listOfAnomalies.append(temp)
    #print(listOfAnomalies)
    for j in listOfAnomalies:
        if '[0, 1, 6]' in j:
            print(newData.index)



Index(['Op. 3 ii'], dtype='object')
Index(['Op. 7 iii'], dtype='object')


# Normal Order

In [48]:
## this calculates anomalous VT values (Normal Order)

verticalities_pf = linear_units_new_no
for i in range(0, len(verticalities_pf)-1):
    temp_df = verticalities_pf.iloc[i:i+1, :]
    
    newData = temp_df.loc[:, (temp_df != 0).any(axis=0)]
    anomalyDataValues = newData.values
    listOfValues= []
    for el in anomalyDataValues.flat:
        listOfValues.append(el)
    firstQuartile = np.percentile(listOfValues, 25)
    thirdQuartile = np.percentile(listOfValues, 75)
    IQR = thirdQuartile-firstQuartile

    topQuartileCutoff = thirdQuartile+1.5*IQR
    bottomQuartileCutoff = firstQuartile-1.5*IQR
    listOfAnomalies = []
    for el in listOfValues:
        if el > topQuartileCutoff:
            temp = []
            temp.append(newData.columns.values[listOfValues.index(el)])
            temp.append(el)
            listOfAnomalies.append(temp)
        if el < bottomQuartileCutoff:
            temp = []
            temp.append(newData.columns.values[listOfValues.index(el)])
            temp.append(el)
            listOfAnomalies.append(temp)
    #print(listOfAnomalies)
    for j in listOfAnomalies:
        if '[0, 1, 6]' in j:
            print(newData.index, '1')
        if '[0, 5, 6]' in j:
            print(newData.index, '5')