In [1]:
import librosa
import numpy as np

# Load the audio file
audio_file_path = 'data/lofi-type-beat.mp3'
y, sr = librosa.load(audio_file_path)

# Compute the Chroma Short-Time Fourier Transform (chroma_stft)
chromagram = librosa.feature.chroma_stft(y=y, sr=sr)

# Calculate the mean chroma feature across time
mean_chroma = np.mean(chromagram, axis=1)

# Define the mapping of chroma features to keys
chroma_to_key = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

# Find the key by selecting the maximum chroma feature
estimated_key_index = np.argmax(mean_chroma)
estimated_key = chroma_to_key[estimated_key_index]

# Print the detected key
print("Detected Key:", estimated_key)

Detected Key: F


# Getting the major or minor key

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import librosa
import librosa.display

In [30]:
# class that uses the librosa library to analyze the key that an mp3 is in
# arguments:
#     waveform: an mp3 file loaded by librosa, ideally separated out from any percussive sources
#     sr: sampling rate of the mp3, which can be obtained when the file is read with librosa
#     tstart and tend: the range in seconds of the file to be analyzed; default to the beginning and end of file if not specified
class Tonal_Fragment(object):
    def __init__(self, waveform, sr, tstart=None, tend=None):
        self.waveform = waveform
        self.sr = sr
        self.tstart = tstart
        self.tend = tend
        
        if self.tstart is not None:
            self.tstart = librosa.time_to_samples(self.tstart, sr=self.sr)
        if self.tend is not None:
            self.tend = librosa.time_to_samples(self.tend, sr=self.sr)
        self.y_segment = self.waveform[self.tstart:self.tend]
        self.chromograph = librosa.feature.chroma_cqt(y=self.y_segment, sr=self.sr, bins_per_octave=24)
        
        # chroma_vals is the amount of each pitch class present in this time interval
        self.chroma_vals = []
        for i in range(12):
            self.chroma_vals.append(np.sum(self.chromograph[i]))
        pitches = ['C','C#','D','D#','E','F','F#','G','G#','A','A#','B']
        # dictionary relating pitch names to the associated intensity in the song
        self.keyfreqs = {pitches[i]: self.chroma_vals[i] for i in range(12)} 
        
        keys = [pitches[i] + ' major' for i in range(12)] + [pitches[i] + ' minor' for i in range(12)]

        # use of the Krumhansl-Schmuckler key-finding algorithm, which compares the chroma
        # data above to typical profiles of major and minor keys:
        maj_profile = [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
        min_profile = [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]

        # finds correlations between the amount of each pitch class in the time interval and the above profiles,
        # starting on each of the 12 pitches. then creates dict of the musical keys (major/minor) to the correlation
        self.min_key_corrs = []
        self.maj_key_corrs = []
        for i in range(12):
            key_test = [self.keyfreqs.get(pitches[(i + m)%12]) for m in range(12)]
            # correlation coefficients (strengths of correlation for each key)
            self.maj_key_corrs.append(round(np.corrcoef(maj_profile, key_test)[1,0], 3))
            self.min_key_corrs.append(round(np.corrcoef(min_profile, key_test)[1,0], 3))

        # names of all major and minor keys
        self.key_dict = {**{keys[i]: self.maj_key_corrs[i] for i in range(12)}, 
                         **{keys[i+12]: self.min_key_corrs[i] for i in range(12)}}
        
        # this attribute represents the key determined by the algorithm
        self.key = max(self.key_dict, key=self.key_dict.get)
        self.bestcorr = max(self.key_dict.values())
        
        # this attribute represents the second-best key determined by the algorithm,
        # if the correlation is close to that of the actual key determined
        self.altkey = None
        self.altbestcorr = None

        for key, corr in self.key_dict.items():
            if corr > self.bestcorr*0.9 and corr != self.bestcorr:
                self.altkey = key
                self.altbestcorr = corr
                
    # prints the correlation coefficients associated with each major/minor key
    def corr_table(self):
        for key, corr in self.key_dict.items():
            print(key, '\t', f'{corr:6.3f}')
    
    # printout of the key determined by the algorithm; if another key is close, that key is mentioned
    def print_key(self):
        print("likely key: ", max(self.key_dict, key=self.key_dict.get), ", correlation: ", self.bestcorr, sep='')
        if self.altkey is not None:
                print("also possible: ", self.altkey, ", correlation: ", self.altbestcorr, sep='')

In [31]:
audio_path = 'data/livin-on-a-prayer.mp3'
# the load function generates a tuple consisting of an audio object y and its sampling rate sr
y, sr = librosa.load(audio_path)
# this function filters out the harmonic part of the sound file from the percussive part, allowing for
# more accurate harmonic analysis
y_harmonic, y_percussive = librosa.effects.hpss(y)
ipd.Audio(audio_path)

In [28]:
# printing over sections
bin_size = 3 # set to 3 second increments
# for i in range(int(len(y_harmonic)/sr//bin_size)):
#     fragment = Tonal_Fragment(y_harmonic, sr, tstart = bin_size*i, tend=bin_size*(i+1))
#     print(bin_size*i,"sec:",fragment.key)
#     if fragment.altkey is not None:
#         #print("\t or:", fragment.altkey)
#         next

# only print out the key for each one if it is different to the prior key sampled
# # this is a way to get a sense of the song's key changes
# last_key = None
# for i in range(int(len(y_harmonic)/sr//bin_size)):
#     fragment = Tonal_Fragment(y_harmonic, sr, tstart = bin_size*i, tend=bin_size*(i+1))
#     if i == 0:
#         print(bin_size*i,"sec:",fragment.key)
#     else:
#         if fragment.key != last_key:
#             print(bin_size*i,"sec:",fragment.key)
#     last_key = fragment.key

# Create an output an array that contains the key for each change along with the seconds
# at which the change occurs
key_changes = []
last_key = None
for i in range(int(len(y_harmonic)/sr//bin_size)):
    fragment = Tonal_Fragment(y_harmonic, sr, tstart = bin_size*i, tend=bin_size*(i+1))
    if i == 0:
        key_changes.append((bin_size*i, fragment.key))
    else:
        if fragment.key != last_key:
            key_changes.append((bin_size*i, fragment.key))
    last_key = fragment.key

# print out the key changes
for i in key_changes:
    print(i)



(0, 'E minor')
(3, 'D major')
(6, 'E minor')
(9, 'C major')
(12, 'E minor')
(30, 'D minor')
(33, 'E minor')
(36, 'C major')
(39, 'E minor')
(51, 'C major')
(54, 'D minor')
(57, 'E minor')
(66, 'C major')
(69, 'B minor')
(72, 'E minor')
(75, 'D major')
(78, 'G major')
(81, 'E minor')
(84, 'D minor')
(87, 'C minor')
(90, 'G major')
(93, 'D major')
(96, 'G major')
(105, 'D minor')
(108, 'E minor')
(120, 'B minor')
(123, 'E minor')
(141, 'G major')
(147, 'E minor')
(150, 'D minor')
(153, 'C minor')
(156, 'G major')
(159, 'D major')
(162, 'G major')
(174, 'C major')
(177, 'G major')
(186, 'D major')
(189, 'C major')
(192, 'E minor')
(195, 'D minor')
(198, 'C minor')
(201, 'A# major')
(204, 'F major')
(210, 'A# major')
(219, 'F major')
(225, 'A# major')
(234, 'F major')
(237, 'A# major')
