# folder-cleaner
Set `input_folder` to the folder you want to clean and `output_folder` to the folder where your files are categorized into folders.

In [2]:
from pathlib import Path

input_folder = Path('~/Downloads').expanduser()
assert input_folder.exists()
output_folder = Path('./sorted')
output_folder.mkdir(exist_ok=True)
assert output_folder.exists()

In [3]:
import http.server
import socketserver
import os
# Start a simple HTTP server to serve files from output_folder
class FileServerHandler(http.server.SimpleHTTPRequestHandler):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, directory = os.fspath(input_folder), **kwargs)
PORT = 8002
def run_server():
    handler = FileServerHandler
    
    with socketserver.TCPServer(("127.0.0.1", PORT), handler) as httpd:
        print("Server started at localhost:" + str(PORT))
        httpd.serve_forever()

In [4]:
from threading import Thread

t = Thread(target=run_server)

t.start()
# Stopping the server should theoretically work with:
# t.join(0)
# However, the port is not released, so currently I restart the kernel to stop the server

Server started at localhost:8002


In [5]:
from IPython.display import display
from ipywidgets import VBox, HTML, Text, Layout, GridBox, Button
import shutil
import subprocess
import gzip, zipfile
import html
from datetime import datetime
from urllib.parse import quote

In [None]:
files = iter(sorted(input_folder.glob('*'), key = lambda x: -x.lstat().st_mtime))
categories = set(output_folder.glob('*'))
current_text = HTML()
description_text = HTML()
next_button = Button(description='Skip')
rename_input = Text(placeholder='Rename to', description='Name: ')
file_host = f"http://localhost:{PORT}"
def show_next_file():
    try:
        file: Path = next(files)
    except StopIteration:
        current_text.value = "You're done!"
        return
    current_text.value = rename_input.value = file.name
    description_text.value = "Modified: "+ datetime.fromtimestamp(file.lstat().st_mtime).strftime("%d/%m/%Y, %H:%M:%S") + "<br>"
    description_text.value += f"""{'Directory' if file.is_dir() else 'File'}"""
    description_text.value += "<div style='height: 450px; overflow: scroll;'>"
    if file.is_file():
        description_text.value += f"Size: {file.lstat().st_size}<br>"
        contents = file.read_bytes()
        suffix = file.suffix.lower()
        host_file_path = quote(file.name)
        if suffix == ".pdf":
            description_text.value += f'<iframe src="{file_host}/{host_file_path}" height="450px" width="500px" />'
        elif suffix in ('.html', '.htm'):
            description_text.value += f'<iframe src="{file_host}/{host_file_path}" sandbox height="450px" width="500px" />'
        elif suffix in ('.jpg', '.png', '.jfif', '.jpeg', '.webm', '.webp', '.gif', '.svg'):
            description_text.value += f'<img src="{file_host}/{host_file_path}" style="max-height: 450px;" />'
        elif suffix in ('.mp4', '.mov', '.mp3'):
            description_text.value += f'<video src="{file_host}/{host_file_path}" style="max-height: 450px;" controls />'
        elif len(contents) > 0:
            if suffix == '.gz':
                contents = gzip.decompress(contents)
            elif suffix in ('.zip', '.epub'):
                contents = '<br>'.join(zipfile.ZipFile(file).namelist())
            elif suffix in ('.html', '.htm'):
                contents = html.escape(contents.decode())
            if type(contents) == str and contents.count('<br>') > 30:
                contents = '<br>'.join(contents.split('<br>')[:30])+"<br><pre>[truncated]</pre>"
            if type(contents) == bytes and contents.isascii():
                contents = contents.decode()
            description_text.value += f"""<br>
            Head:
            <pre>{contents[:10000]}</pre>
            """
    elif file.is_dir():
        file_list = "Files:<ul>"
        for sub_file in list(file.glob("*"))[:50]:
            file_list += f"<li>{sub_file.name[:100]}</li>"
        description_text.value += file_list
    description_text.value += '</div>'

next_button.on_click(lambda _: show_next_file())
show_next_file()
def move_file(folder: str):
    _from = input_folder.joinpath(current_text.value)
    try:
        to = output_folder.joinpath(folder, rename_input.value)
        _from.rename(to)
        show_next_file()
        print('moved', _from, to)
    
    except Exception as e:
        subprocess.run(['explorer.exe', '/select', str(_from)])
        raise e
categories_layout = Layout(grid_template_columns='repeat(2, 1fr)')
categories_hbox = GridBox(layout=categories_layout)
categories_explanation = HTML("""
You don't have any categories yet!<br>
Add one above to get started.<br>
Each category corresponds to a folder in the output folder.
""")
def add_category(category: str):
    output_folder.joinpath(category).mkdir(exist_ok=True, parents=True)
    button = Button(description=category)
    categories_hbox.children = list(categories_hbox.children) + [button]
    button.on_click(lambda b: move_file(b.description))
    categories_explanation.value = "Move current element to:"
category_input = Text(placeholder='Category')
category_button = Button(description='Add category')

def delete_file(file: str):
    target = input_folder.joinpath(file)
    if target.is_dir():
        shutil.rmtree(target, False, print)
    else:
        target.unlink()
    show_next_file()
    print('deleted', target.name)
delete_button = Button(description='Delete')
delete_button.on_click(lambda _: delete_file(current_text.value))
open_button = Button(description='Open in Explorer')
open_button.on_click(lambda _: subprocess.run(['explorer.exe', '/select,', str(input_folder.joinpath(current_text.value))]))
category_button.on_click(lambda _: add_category(category_input.value))
actions = VBox([open_button, delete_button, rename_input, next_button, category_input, category_button, categories_explanation, categories_hbox])
file_infos = VBox([current_text, description_text])
main_layout = Layout(grid_template_columns='320px auto')
main_grid = GridBox([actions, file_infos],layout=main_layout)
for category in categories:
    add_category(category.name)

display(main_grid)