Skip to content


Browse files Browse the repository at this point in the history
Use a vector instead of a map to store fingerprints and reserve its s…

Use a vector so the map doesn't have to be converted to a vector for
each iteration (since __gnu_parallel::for_each doesn't work on
non-random-access iterators).

Before (gcc7): around 2.9~3 seconds per iteration (30000 songs/second)
After (gcc8): around 2.4 seconds per iteration (36600 songs/second)
  • Loading branch information
antlarr committed Jul 10, 2018
1 parent 046da19 commit 52bce48
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 9 deletions.
1 change: 1 addition & 0 deletions bard/
Expand Up @@ -898,6 +898,7 @@ def findAudioDuplicates2(self, from_song_id=None):
speeds = []
songs_processed = 0
totalSongsCount = MusicDatabase.getSongsCount()
sql = ('SELECT id, fingerprint, sha256sum, audio_sha256sum, path, '
'completeness FROM fingerprints, songs, checksums, '
'properties where and '
Expand Down
46 changes: 37 additions & 9 deletions bard/bard_ext.cpp
Expand Up @@ -57,6 +57,9 @@ class FingerprintManager
void setMaxOffset(int maxoffset);
int maxOffset() const;

void setExpectedSize(int expectedSize);
int size() const;

void addSong(long songID, boost::python::list &fingerprint);
boost::python::list addSongAndCompare(long songID, boost::python::list &fingerprint, double cancelThreshold=0.55);
std::pair<int, double> compareSongs(long songID1, long songID2, double cancelThreshold=0.55);
Expand All @@ -65,9 +68,12 @@ class FingerprintManager
std::pair<int, double> compareChromaprintFingerprintsAndOffset(const std::vector<int> &fp1, const std::vector<int> &fp2, double cancelThreshold) const;
boost::python::list compareChromaprintFingerprintsAndOffsetVerbose(std::vector<int> fp1, std::vector<int> fp2) const;

std::vector<int> songFingerprint(int songID);

int m_maxoffset;
std::map<int, std::vector<int>> m_fingerprints;
std::vector<std::pair<int,std::vector<int>>> m_fingerprints;

FingerprintManager::FingerprintManager(): m_maxoffset(50)

This comment has been minimized.

Copy link

miscco Jul 11, 2018

Rather than declaring an unneeded default constructor you can simply use member initialiation and declare
int m_maxoffset{50};

That way you would not need the constructor at all.

Expand All @@ -85,11 +91,33 @@ int FingerprintManager::maxOffset() const
return m_maxoffset;

void FingerprintManager::setExpectedSize(int expectedSize)

int FingerprintManager::size() const
return m_fingerprints.size();

std::vector<int> FingerprintManager::songFingerprint(int songID)
auto it = std::lower_bound( m_fingerprints.begin(), m_fingerprints.end(), songID,
[](auto x, auto y)
{ return x.first < y;
if (it == m_fingerprints.end())
return std::vector<int>();
return it->second;

void FingerprintManager::addSong(long songID, boost::python::list &fingerprint)
auto v = to_std_vector<int>(fingerprint);
v.insert(v.begin(), m_maxoffset, 0);
m_fingerprints.emplace_back(std::make_pair(songID, v));

This comment has been minimized.

Copy link

miscco Jul 11, 2018

This is superfluous, emplace_back forwards the arguments to the constructor, which is the same thing that std::make_pair does. So you should rather use:
m_fingerprints.emplace_back(songID, v);

This comment has been minimized.

Copy link

yumetodo Jul 12, 2018

watch #3


boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::python::list &fingerprint, double cancelThreshold)
Expand All @@ -99,9 +127,7 @@ boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::py
auto v = to_std_vector<int>(fingerprint);
v.insert(v.begin(), m_maxoffset, 0);

auto vectorizedFP = std::vector<std::pair<int,std::vector<int>>>(m_fingerprints.begin(), m_fingerprints.end());

__gnu_parallel::for_each(vectorizedFP.begin(), vectorizedFP.end(),
__gnu_parallel::for_each(m_fingerprints.begin(), m_fingerprints.end(),
[&](const auto &itSong)
auto & [itSongID, itFingerprint] = itSong;
Expand All @@ -113,7 +139,7 @@ boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::py
}, __gnu_parallel::parallel_balanced);
m_fingerprints.emplace_back(std::make_pair(songID, v));
return result;

Expand Down Expand Up @@ -238,12 +264,12 @@ boost::python::list FingerprintManager::compareChromaprintFingerprintsAndOffsetV

std::pair<int, double> FingerprintManager::compareSongs(long songID1, long songID2, double cancelThreshold)
return compareChromaprintFingerprintsAndOffset(m_fingerprints[songID1], m_fingerprints[songID2], cancelThreshold);
return compareChromaprintFingerprintsAndOffset(songFingerprint(songID1), songFingerprint(songID2), cancelThreshold);

boost::python::list FingerprintManager::compareSongsVerbose(long songID1, long songID2)
return compareChromaprintFingerprintsAndOffsetVerbose(m_fingerprints[songID1], m_fingerprints[songID2]);
return compareChromaprintFingerprintsAndOffsetVerbose(songFingerprint(songID1), songFingerprint(songID2));

Expand All @@ -257,7 +283,9 @@ BOOST_PYTHON_MODULE(bard_ext)
.def("compareSongs", &FingerprintManager::compareSongs)
.def("compareSongsVerbose", &FingerprintManager::compareSongsVerbose)
.def("setMaxOffset", &FingerprintManager::setMaxOffset)
.def("maxOffset", &FingerprintManager::maxOffset);
.def("maxOffset", &FingerprintManager::maxOffset)
.def("setExpectedSize", &FingerprintManager::setExpectedSize)
.def("size", &FingerprintManager::size);

0 comments on commit 52bce48

Please sign in to comment.