In [1]:
import os
import redis
import hashlib

In [2]:
redis_url = "redis://localhost:6379"
r = redis.from_url(redis_url)
dataset_name = "careful"
input_dir = os.path.join("..", "data", "datasets", f"{dataset_name}")
output_dir = os.path.join("..", "data", "outputs")
files = [f for f in os.listdir(input_dir) if f.endswith(".mid")]
hash_map = {}

In [3]:
def hash_key(key):
    """
    Hashes a key into a shorter string using SHA-256 and returns a hexadecimal representation.

    Args:
        key (str): The key to hash.

    Returns:
        str: A hexadecimal string representation of the hash of the key.
    """
    hash_object = hashlib.sha256()
    hash_object.update(key.encode("utf-8"))
    return hash_object.hexdigest()


def store_file_hash(filename):
    """
    Stores the hash of the filename in a dictionary with the hash as the key and the filename as the value.

    Args:
        filename (str): The filename to be hashed and stored.

    Returns:
        str: The hash of the filename.
    """
    hashed_filename = hash_key(filename)
    hash_map[hashed_filename] = filename
    return hashed_filename


def retrieve_original_filename(hashed_key):
    """
    Retrieves the original filename from a hash.

    Args:
        hashed_key (str): The hashed key whose original filename is to be retrieved.

    Returns:
        str: The original filename corresponding to the hashed key.
    """
    return hash_map.get(hashed_key, "No original filename found for this hash.")

In [4]:
for file in files:
    if file not in hash_map:
        hashed_file = store_file_hash(file)
        r.set(f"hashes:files:{hashed_file}", file)
        r.set(f"hashes:hashes:{file}", hashed_file)

hash_map

{'d1633378478b692da4507fec8158caa9f78bf69423db67aa8cacfe68a0faa61a': '20240126-50-02_0040-0048.mid',
 '73fcc6e98284d03d48647f4ebe6a9f9ad48e9c975bd40f2d99f50586a93d18ab': '20240121-70-02_0232-0240.mid',
 'fa5ae6ab269446a1bcb224da6f9a768aee2a21cebd4da530d6b45f5b1632f823': '20240121-70-09_0224-0232.mid',
 'd375c9e46d7ecc63f759f91bb701de122fe708979a6737faf1832035890b3740': '20240121-70-02_0024-0032.mid',
 '99b355e8011723a515ff10103f8678b60ce2c9391429f169251a4369e696a92d': '20231220-80-03_0440-0448.mid',
 '1b80824953933d6f6b83b4c10539193e7d7045982d351c5bac50a9e257e507eb': '20240227-76-01_0320-0328.mid',
 'b0ff928d3552212c6227a3bab12aa53e8e0e99f68a2f2ceca1b7b5faf7803968': '20240213-100-02_1992-2000.mid',
 '915ec6d9bc362924e1da01a851989c193f98cbc6237b51b132edda1ea8a8efa5': '20240213-100-06_0888-0896.mid',
 '71d40cdf458f7db20fcbdc9d657234bd19a2e3e705bc62647e3cef103ba6776a': '20240213-100-03_0144-0152.mid',
 '0089379f3030b9aff0df08b014213127d1552c452af621c41d4e5e7a781f35c6': '20231227-80-02_022