In [None]:
import sys
import os
import hashlib
from collections import defaultdict
from PyQt5 import QtCore, QtWidgets
from PyQt5.QtWidgets import QFileDialog, QTreeWidgetItem
from PyQt5.QtCore import Qt, QThread, QTime, pyqtSignal


class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(800, 600)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)

        # Directory input layout
        self.inputLayout = QtWidgets.QHBoxLayout()
        self.directoryLabel = QtWidgets.QLabel("Directory:")
        self.directoryLineEdit = QtWidgets.QLineEdit()
        self.browseButton = QtWidgets.QPushButton("Browse")
        self.scanButton = QtWidgets.QPushButton("Scan")
        self.stopButton = QtWidgets.QPushButton("Stop")
        self.stopButton.setVisible(False)
        self.scanButton.setEnabled(False)

        self.inputLayout.addWidget(self.directoryLabel)
        self.inputLayout.addWidget(self.directoryLineEdit)
        self.inputLayout.addWidget(self.browseButton)
        self.inputLayout.addWidget(self.scanButton)
        self.inputLayout.addWidget(self.stopButton)
        self.verticalLayout.addLayout(self.inputLayout)

        # TreeWidget
        self.treeWidget = QtWidgets.QTreeWidget()
        self.treeWidget.setHeaderLabels(["File Name", "Size", "Folder", "Path"])
        self.verticalLayout.addWidget(self.treeWidget)

        # Status labels and progress bar
        self.statusLayout = QtWidgets.QVBoxLayout()
        self.statusbar1 = QtWidgets.QLabel()
        self.statusbar2 = QtWidgets.QLabel()
        self.progressBar = QtWidgets.QProgressBar()
        self.progressBar.setVisible(False)

        self.statusLayout.addWidget(self.statusbar1)
        self.statusLayout.addWidget(self.statusbar2)
        self.statusLayout.addWidget(self.progressBar)
        self.verticalLayout.addLayout(self.statusLayout)

        MainWindow.setCentralWidget(self.centralwidget)
        MainWindow.setWindowTitle("File Duplicate Scanner")

class ScanThread(QThread):
    update_progress = pyqtSignal(int)
    scan_complete = pyqtSignal(dict)

    def __init__(self, directory):
        super().__init__()
        self.directory = directory
        self.stop_requested = False

    def stop(self):
        self.stop_requested = True

    def get_file_hash(self, path):
        try:
            hasher = hashlib.sha256()
            with open(path, 'rb') as f:
                for chunk in iter(lambda: f.read(8192), b''):
                    hasher.update(chunk)
            return hasher.hexdigest()
        except Exception:
            return None

    def run(self):
        hash_map = defaultdict(list)
        all_files = []

        for root, dirs, files in os.walk(self.directory):
            for file in files:
                file_path = os.path.join(root, file)
                all_files.append((file_path, os.path.basename(root), file))

        total = len(all_files)
        for i, (file_path, folder_name, file_name) in enumerate(all_files):
            if self.stop_requested:
                return

            file_hash = self.get_file_hash(file_path)
            if file_hash:
                try:
                    file_size = os.path.getsize(file_path)
                    hash_map[file_hash].append((file_path, folder_name, file_name, file_size))
                except (FileNotFoundError, PermissionError):
                    continue

            progress = int((i + 1) / total * 100)
            self.update_progress.emit(progress)

        self.scan_complete.emit(hash_map)


class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow):
    def __init__(self):
        super().__init__()
        self.setupUi(self)

        self.progress_value = 0
        self.scan_thread = None
        self.timer = QTimer()
        self.timer.timeout.connect(self.update_progress_ui)
        self.timer.start(100)

        self.browseButton.clicked.connect(self.browse)
        self.scanButton.clicked.connect(self.start_scan)
        self.stopButton.clicked.connect(self.stop_scan)
        self.directoryLineEdit.textChanged.connect(self.toggle_scan_button)

    def format_file_size(self, size):
        if size < 1024:
            return f"{size} B"
        elif size < 1024 ** 2:
            return f"{size / 1024:.2f} KB"
        else:
            return f"{size / (1024 ** 2):.2f} MB"

    def browse(self):
        directory = QFileDialog.getExistingDirectory()
        if directory:
            self.directoryLineEdit.setText(directory)

    def toggle_scan_button(self):
        self.scanButton.setEnabled(bool(self.directoryLineEdit.text()))

    def start_scan(self):
        directory = self.directoryLineEdit.text()
        if not os.path.isdir(directory):
            return

        self.treeWidget.clear()
        self.statusbar1.clear()
        self.statusbar2.clear()
        self.progressBar.setValue(0)
        self.progressBar.setVisible(True)
        self.scanButton.setVisible(False)
        self.stopButton.setVisible(True)

        self.scan_thread = ScanThread(directory)
        self.scan_thread.update_progress.connect(self.store_progress_value)
        self.scan_thread.scan_complete.connect(self.on_scan_complete)
        self.scan_thread.start()

    def stop_scan(self):
        if self.scan_thread:
            self.scan_thread.stop()
            self.statusbar1.setText("Scan stopped.")
            self.scanButton.setVisible(True)
            self.stopButton.setVisible(False)
            self.progressBar.setVisible(False)

    def store_progress_value(self, value):
        self.progress_value = value

    def update_progress_ui(self):
        self.progressBar.setValue(self.progress_value)

    def on_scan_complete(self, hash_map):
        self.progressBar.setVisible(False)
        self.progressBar.setValue(100)

        duplicates = 0
        dup_size = 0
        total_files = sum(len(v) for v in hash_map.values())

        for file_list in hash_map.values():
            if len(file_list) > 1:
                duplicates += len(file_list) - 1
                dup_size += sum(file[3] for file in file_list[1:])

        percentage = (duplicates / total_files) * 100 if total_files else 0
        self.statusbar1.setText(f"Total Files: {total_files}, Duplicates: {duplicates}")
        self.statusbar2.setText(f"Duplicate Size: {self.format_file_size(dup_size)} ({percentage:.2f}%)")

        for file_list in hash_map.values():
            if len(file_list) > 1:
                original_item = QTreeWidgetItem(self.treeWidget)
                original_item.setText(0, file_list[0][2])
                original_item.setText(1, self.format_file_size(file_list[0][3]))
                original_item.setText(2, file_list[0][1])
                original_item.setText(3, file_list[0][0])

                for dup in file_list[1:]:
                    dup_item = QTreeWidgetItem(original_item)
                    dup_item.setText(0, dup[2])
                    dup_item.setText(1, self.format_file_size(dup[3]))
                    dup_item.setText(2, dup[1])
                    dup_item.setText(3, dup[0])

                self.treeWidget.expandItem(original_item)

        self.scanButton.setVisible(True)
        self.stopButton.setVisible(False)


if __name__ == "__main__":
    app = QtWidgets.QApplication(sys.argv)
    window = MainWindow()
    window.show()
    sys.exit(app.exec_())