Skip to content
Permalink
Browse files

Use a vector instead of a map to store fingerprints and reserve its s…

…pace.

Use a vector so the map doesn't have to be converted to a vector for
each iteration (since __gnu_parallel::for_each doesn't work on
non-random-access iterators).

Before (gcc7): around 2.9~3 seconds per iteration (30000 songs/second)
After (gcc8): around 2.4 seconds per iteration (36600 songs/second)
  • Loading branch information...
antlarr committed Jun 28, 2018
1 parent 046da19 commit 52bce48eca635f6210036366759ecd9c27ea0924
Showing with 38 additions and 9 deletions.
  1. +1 −0 bard/bard.py
  2. +37 −9 bard/bard_ext.cpp
@@ -898,6 +898,7 @@ def findAudioDuplicates2(self, from_song_id=None):
speeds = []
songs_processed = 0
totalSongsCount = MusicDatabase.getSongsCount()
fpm.setExpectedSize(totalSongsCount+5)
sql = ('SELECT id, fingerprint, sha256sum, audio_sha256sum, path, '
'completeness FROM fingerprints, songs, checksums, '
'properties where songs.id=fingerprints.song_id and '
@@ -57,6 +57,9 @@ class FingerprintManager
void setMaxOffset(int maxoffset);
int maxOffset() const;

void setExpectedSize(int expectedSize);
int size() const;

void addSong(long songID, boost::python::list &fingerprint);
boost::python::list addSongAndCompare(long songID, boost::python::list &fingerprint, double cancelThreshold=0.55);
std::pair<int, double> compareSongs(long songID1, long songID2, double cancelThreshold=0.55);
@@ -65,9 +68,12 @@ class FingerprintManager
std::pair<int, double> compareChromaprintFingerprintsAndOffset(const std::vector<int> &fp1, const std::vector<int> &fp2, double cancelThreshold) const;
boost::python::list compareChromaprintFingerprintsAndOffsetVerbose(std::vector<int> fp1, std::vector<int> fp2) const;

protected:
std::vector<int> songFingerprint(int songID);

private:
int m_maxoffset;
std::map<int, std::vector<int>> m_fingerprints;
std::vector<std::pair<int,std::vector<int>>> m_fingerprints;
};

FingerprintManager::FingerprintManager(): m_maxoffset(50)

This comment has been minimized.

@miscco

miscco Jul 11, 2018

Rather than declaring an unneeded default constructor you can simply use member initialiation and declare
int m_maxoffset{50};

That way you would not need the constructor at all.

@@ -85,11 +91,33 @@ int FingerprintManager::maxOffset() const
return m_maxoffset;
}

void FingerprintManager::setExpectedSize(int expectedSize)
{
m_fingerprints.reserve(expectedSize);
}

int FingerprintManager::size() const
{
return m_fingerprints.size();
}

std::vector<int> FingerprintManager::songFingerprint(int songID)
{
auto it = std::lower_bound( m_fingerprints.begin(), m_fingerprints.end(), songID,
[](auto x, auto y)
{ return x.first < y;
});
if (it == m_fingerprints.end())
return std::vector<int>();
else
return it->second;
}

void FingerprintManager::addSong(long songID, boost::python::list &fingerprint)
{
auto v = to_std_vector<int>(fingerprint);
v.insert(v.begin(), m_maxoffset, 0);
m_fingerprints[songID]=v;
m_fingerprints.emplace_back(std::make_pair(songID, v));

This comment has been minimized.

@miscco

miscco Jul 11, 2018

This is superfluous, emplace_back forwards the arguments to the constructor, which is the same thing that std::make_pair does. So you should rather use:
m_fingerprints.emplace_back(songID, v);

This comment has been minimized.

@yumetodo
}

boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::python::list &fingerprint, double cancelThreshold)
@@ -99,9 +127,7 @@ boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::py
auto v = to_std_vector<int>(fingerprint);
v.insert(v.begin(), m_maxoffset, 0);

auto vectorizedFP = std::vector<std::pair<int,std::vector<int>>>(m_fingerprints.begin(), m_fingerprints.end());

__gnu_parallel::for_each(vectorizedFP.begin(), vectorizedFP.end(),
__gnu_parallel::for_each(m_fingerprints.begin(), m_fingerprints.end(),
[&](const auto &itSong)
{
auto & [itSongID, itFingerprint] = itSong;
@@ -113,7 +139,7 @@ boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::py
result_mutex.unlock();
}
}, __gnu_parallel::parallel_balanced);
m_fingerprints[songID]=v;
m_fingerprints.emplace_back(std::make_pair(songID, v));
return result;
}

@@ -238,12 +264,12 @@ boost::python::list FingerprintManager::compareChromaprintFingerprintsAndOffsetV

std::pair<int, double> FingerprintManager::compareSongs(long songID1, long songID2, double cancelThreshold)
{
return compareChromaprintFingerprintsAndOffset(m_fingerprints[songID1], m_fingerprints[songID2], cancelThreshold);
return compareChromaprintFingerprintsAndOffset(songFingerprint(songID1), songFingerprint(songID2), cancelThreshold);
}

boost::python::list FingerprintManager::compareSongsVerbose(long songID1, long songID2)
{
return compareChromaprintFingerprintsAndOffsetVerbose(m_fingerprints[songID1], m_fingerprints[songID2]);
return compareChromaprintFingerprintsAndOffsetVerbose(songFingerprint(songID1), songFingerprint(songID2));
}

BOOST_PYTHON_MODULE(bard_ext)
@@ -257,7 +283,9 @@ BOOST_PYTHON_MODULE(bard_ext)
.def("compareSongs", &FingerprintManager::compareSongs)
.def("compareSongsVerbose", &FingerprintManager::compareSongsVerbose)
.def("setMaxOffset", &FingerprintManager::setMaxOffset)
.def("maxOffset", &FingerprintManager::maxOffset);
.def("maxOffset", &FingerprintManager::maxOffset)
.def("setExpectedSize", &FingerprintManager::setExpectedSize)
.def("size", &FingerprintManager::size);
}


0 comments on commit 52bce48

Please sign in to comment.
You can’t perform that action at this time.