-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
48 lines (39 loc) · 1.24 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import glob
import os
from src.config import IMAGE_DIR, HASH_SIZE, DUPLICATES_DIR
from detect_similarities import Codecember
import time
from PIL import Image
import imagehash
def generate_report():
counter = 0
for dir in os.listdir(DUPLICATES_DIR):
if len(os.listdir(os.path.join(DUPLICATES_DIR, dir))) > 1:
counter +=1
ratio = counter / len(os.listdir(DUPLICATES_DIR))
print(f"Duplicates found: {counter}\nRatio: {ratio}")
def main():
dr = Codecember()
hashes = {}
counter = 0
files = glob.glob(f'{IMAGE_DIR}/*')
start_time = time.time()
for file in files:
counter += 1
print(f"{counter} / {len(files)}")
try:
with Image.open(file) as img:
temp_hash = imagehash.average_hash(img, hash_size=HASH_SIZE)
if temp_hash in hashes:
dr.move_files(file, hashes[temp_hash])
else:
hashes[temp_hash] = file
dr.move_files(file, file)
except Exception as e:
print(e)
end_time = time.time()
print(f"Processing time: {(round(end_time - start_time)/60)}/minutes")
generate_report()
if __name__ == '__main__':
main()
generate_report()