Skip to content

Commit 8759edb

Browse files
committed
Add parallelization to find audio duplicates MUCH faster
Use __gnu_parallel::for_each so all CPUs are used at the same time to calculate audio duplicates which greatly increases the speed. Previously it took each iteration around 150 seconds (570 songs/second). After this patch, each iteration takes around 8.5 seconds (11200 songs/second).
1 parent b949ecd commit 8759edb

3 files changed

Lines changed: 23 additions & 21 deletions

File tree

bard/bard.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,6 +921,7 @@ def findAudioDuplicates2(self, from_song_id=None):
921921
# return
922922
start_time = time.time()
923923
result = fpm.addSongAndCompare(songID, dfp[0], storeThreshold)
924+
result.sort(key=lambda x: x[0])
924925

925926
for (songID2, offset, similarity) in result:
926927
print('******** %d %d %d %f' % (songID2, songID,

bard/bard_ext.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <vector>
2525
#include <map>
2626
#include <iostream>
27+
#include <parallel/algorithm>
28+
#include <mutex>
2729

2830
template<typename T>
2931
inline
@@ -60,12 +62,12 @@ class FingerprintManager
6062
std::pair<int, double> compareSongs(long songID1, long songID2, double cancelThreshold=0.55);
6163
boost::python::list compareSongsVerbose(long songID1, long songID2);
6264

63-
std::pair<int, double> compareChromaprintFingerprintsAndOffset(std::vector<int> fp1, std::vector<int> fp2, double cancelThreshold) const;
65+
std::pair<int, double> compareChromaprintFingerprintsAndOffset(const std::vector<int> &fp1, const std::vector<int> &fp2, double cancelThreshold) const;
6466
boost::python::list compareChromaprintFingerprintsAndOffsetVerbose(std::vector<int> fp1, std::vector<int> fp2) const;
6567

6668
private:
6769
int m_maxoffset;
68-
std::map<int, std::vector<int> > m_fingerprints;
70+
std::map<int, std::vector<int>> m_fingerprints;
6971
};
7072

7173
FingerprintManager::FingerprintManager(): m_maxoffset(50)
@@ -92,32 +94,30 @@ void FingerprintManager::addSong(long songID, boost::python::list &fingerprint)
9294

9395
boost::python::list FingerprintManager::addSongAndCompare(long songID, boost::python::list &fingerprint, double cancelThreshold)
9496
{
97+
std::mutex result_mutex;
9598
boost::python::list result;
9699
auto v = to_std_vector<int>(fingerprint);
97-
// std::cout << "len: " << v.size() << std::endl;
98100
v.insert(v.begin(), m_maxoffset, 0);
99-
// std::cout << "new len: " << v.size() << std::endl;
100-
for (auto & [itSongID, itFingerprint]: m_fingerprints)
101-
{
102-
auto [offset, similarity] = compareChromaprintFingerprintsAndOffset(itFingerprint, v, cancelThreshold);
103-
if (similarity > cancelThreshold)
104-
{
105-
// std::cout << "****" << songID << " " << itSongID << " " << offset << " " << similarity << std::endl;
106-
result.append(boost::python::make_tuple(itSongID, offset, similarity));
107-
} /*else {
108-
if (similarity < 0 )
109-
std::cout << songID << " " << itSongID << " different" << std::endl;
110-
else
111-
std::cout << songID << " " << itSongID << " " << offset << " " << similarity << std::endl;
112-
}*/
113101

114-
}
102+
auto vectorizedFP = std::vector<std::pair<int,std::vector<int>>>(m_fingerprints.begin(), m_fingerprints.end());
115103

104+
__gnu_parallel::for_each(vectorizedFP.begin(), vectorizedFP.end(),
105+
[&](const auto &itSong)
106+
{
107+
auto & [itSongID, itFingerprint] = itSong;
108+
auto [offset, similarity] = compareChromaprintFingerprintsAndOffset(itFingerprint, v, cancelThreshold);
109+
if (similarity > cancelThreshold)
110+
{
111+
result_mutex.lock();
112+
result.append(boost::python::make_tuple(itSongID, offset, similarity));
113+
result_mutex.unlock();
114+
}
115+
}, __gnu_parallel::parallel_balanced);
116116
m_fingerprints[songID]=v;
117117
return result;
118118
}
119119

120-
std::pair<int, double> FingerprintManager::compareChromaprintFingerprintsAndOffset(std::vector<int> fp1, std::vector<int> fp2, double cancelThreshold) const
120+
std::pair<int, double> FingerprintManager::compareChromaprintFingerprintsAndOffset(const std::vector<int> &fp1, const std::vector<int> &fp2, double cancelThreshold) const
121121
{
122122
std::vector<int>::const_iterator it1, it2;
123123
int offset;

setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33

44
module1 = Extension('bard_ext',
55
define_macros=[('MAJOR_VERSION', '1'),
6-
('MINOR_VERSION', '0')],
6+
('MINOR_VERSION', '0'),
7+
('_GLIBCXX_PARALLEL', None)],
78
include_dirs=['/usr/include/boost'],
89
libraries=['boost_python-py3'],
910
library_dirs=['/usr/lib'],
1011
sources=['bard/bard_ext.cpp'],
11-
extra_compile_args=['-std=c++1z'])
12+
extra_compile_args=['-std=gnu++17', '-fopenmp'])
1213

1314
setup(
1415
# Application name:

0 commit comments

Comments
 (0)