In [16]:
import librosa
import numpy as np
import math
import librosa.display as display
import matplotlib.pyplot as plt
from collections import Counter

In [17]:
audio_path = 'data/Vibe - Odhora (with lyrics).wav'

In [18]:
y, sr = librosa.load(audio_path)

In [19]:
print(y)

[0. 0. 0. ... 0. 0. 0.]


In [20]:
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

In [21]:
print(tempo)
print(beat_frames)

129.19921875
[   55    76    94   111   128   148   167   187   207   227   248   268
   288   308   328   348   369   390   410   430   450   471   491   511
   530   550   571   591   611   631   651   671   691   712   733   753
   773   794   814   834   854   874   894   914   932   950   968   986
  1006  1025  1045  1066  1086  1106  1126  1146  1166  1186  1207  1227
  1246  1266  1286  1307  1328  1348  1368  1389  1409  1429  1450  1470
  1490  1510  1531  1551  1571  1591  1611  1631  1651  1671  1692  1712
  1732  1753  1772  1793  1813  1834  1854  1873  1893  1914  1934  1954
  1974  1994  2014  2034  2054  2075  2095  2115  2135  2156  2176  2197
  2216  2236  2256  2277  2297  2317  2337  2358  2379  2399  2419  2439
  2459  2479  2499  2519  2539  2559  2580  2600  2620  2641  2661  2681
  2702  2722  2741  2762  2781  2803  2823  2842  2862  2883  2904  2923
  2944  2964  2984  3004  3024  3044  3064  3084  3104  3125  3145  3166
  3185  3206  3226  3247  3268  3287  

In [22]:
# number of beats
n_beats = len(beat_frames)

print("Number Of Beats:", n_beats)

Number Of Beats: 594


In [23]:
# number of pitch classes
###### FLAWED CODE ######

chromagram = librosa.feature.chroma_stft(y=y, sr=sr)

# Calculate number of pitch classes
pitch_classes = np.shape(chromagram)[0]

print("Number Of Pitch Classes:", pitch_classes)

Number Of Pitch Classes: 12


In [24]:
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)

# Normalize the chroma matrix so that the values are between 0 and 1
chroma = librosa.util.normalize(chroma, axis=0)

# Take the sum along the columns to get the number of pitch classes
pitch_classes = np.sum(chroma, axis=0)

print("Number of pitch classes: ", np.mean(pitch_classes))

Number of pitch classes:  4.979615


In [25]:
# hemitonicity
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)

# Calculate hemitonicity
hemitonicity = np.mean(spectral_contrast)

print("Hemitonicity:", hemitonicity)

Hemitonicity: 28.25907755280757


In [26]:
def hz_to_cents(arr):
    res = []
    for i in range(arr.size):
        if(arr[i]>0):
            mul = arr[i]/440
            ans = 1200* (math.log2(mul))
            res.append(ans)
    return res

In [27]:
# melodic range
# Compute pitch chroma
#chroma = librosa.feature.chroma_stft(y=y, sr=sr)

# Calculate melodic range
#melodic_range = np.ptp(chroma)
#print(melodic_range)

pitch, magnitude = librosa.piptrack(y=y, sr=sr)
pitch_sequence = np.argmax(pitch, axis=0)

cents = hz_to_cents(pitch_sequence)

min_cents = np.min(cents)
max_cents = np.max(cents)
melo_range = max_cents - min_cents

print("Melodic Range:", melo_range)

Melodic Range: 5673.50454547584


In [28]:
# MELISMA
mfcc = librosa.feature.mfcc(y=y, sr=sr)
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
rms = librosa.feature.rms(y=y)
zcr = librosa.feature.zero_crossing_rate(y)

# Calculate Melisma based on your desired method
melisma = np.mean(mfcc) * np.mean(spectral_contrast) * np.mean(rms) * np.mean(zcr)

print("Melisma:", melisma)

Melisma: 1.48852492055703


In [29]:
def motivic_redundancy(y, sr):
    C = librosa.feature.chroma_cqt(y=y, sr=sr)
    chroma_motifs = []
    for i in range(C.shape[1]-2):
        chroma_motifs.append(C[:,i:i+3])
    motivic_redundancy = []
    for i in range(len(chroma_motifs)):
        for j in range(len(chroma_motifs)):
            if i!=j:
                similarity = np.dot(chroma_motifs[i].T, chroma_motifs[j])
                motivic_redundancy.append(np.max(similarity))
    return np.mean(motivic_redundancy)

In [30]:
# motivic_red = motivic_redundancy(y, sr)
# print("Motivic Redundancy: ", motivic_red)

In [31]:
#phrase_length = np.mean(np.diff(beat_frames)) / 4
#print(phrase_length)
# Convert the phrase length from seconds to samples
#phrase_length_samples = int(phrase_length * sr)
#print(phrase_length_samples)

In [32]:
onset_envelope = librosa.onset.onset_strength(y=y, sr=sr)

# Identify onsets
onsets = librosa.onset.onset_detect(y=y, onset_envelope=onset_envelope, sr=sr)

# Calculate the difference between consecutive onsets
phrase_length = np.diff(onsets)
# Convert to seconds
phrase_length_sec = phrase_length / sr
print("Maximum Phrase Length in Seconds: ",np.max(phrase_length_sec))

Maximum Phrase Length in Seconds:  0.002766439909297052


In [33]:
print(np.max(phrase_length))
print(np.min(phrase_length))
print(np.max(phrase_length)/np.min(phrase_length))

61
2
30.5


In [34]:
max_phrase_len = np.max(phrase_length_sec)
min_phrase_len = np.min(phrase_length_sec)
print(max_phrase_len)
print(min_phrase_len)
print("Phrase Symmetry: ",max_phrase_len/min_phrase_len)

0.002766439909297052
9.070294784580499e-05
Phrase Symmetry:  30.499999999999996


In [35]:
def most_frequent(list1):
    count = Counter(list1)
    return max(count, key=count.get) , max(count.values())

In [36]:
overlap = []
for i in range(len(phrase_length) - 1):
    overlap.append(sum(phrase_length[i:i+2]) / max(phrase_length[i:i+2]))
    
num,times = most_frequent(overlap)
percentage = (times/len(overlap))*100

# Print overlap results
print("Phrase overlap: ", percentage)

Phrase overlap:  13.048245614035087
