In [1]:
# standard libs
import hashlib
import os
import shutil
import stat
import argparse
import re
import sqlite3
from datetime import datetime

In [2]:
# third party dependencies
import filetype
import mutagen

In [3]:
import pandas as pd

In [4]:
from stat import S_IREAD, S_IRGRP, S_IROTH

TS_MULT = 10**9
BLOCKLENGTH = 262144

def make_file_read_only(path):
    os.chmod(path, S_IREAD|S_IRGRP|S_IROTH)
        
def open_file(root_path, name, temp_path=None):
    if not temp_path:
        temp_path = root_path
        
    if name.startswith('.'):
        #we ignore files starting with a dot when inspecting
        #the stash, so we don't want to use such a name for a
        #file we care about
        name = '_'+name
    
    temp_file_name = datetime.now().strftime('%Y%m%d%H%M%S%f')
    temp_file_path = os.path.join(temp_path, temp_file_name)
    temp_file = open(temp_file_path, 'wb')
    return {'file': temp_file,
            'name': name,
            'root_path': root_path,
            'temp_file_path': temp_file_path,
            'hasher': hashlib.sha256(),
            'hashing_started': int(datetime.now().timestamp()*TS_MULT),
            'bytes_hashed': 0}

def write_to_file(record, data):
    record['file'].write(data)
    record['hasher'].update(data)
    record['bytes_hashed'] = record['bytes_hashed'] + len(data)

def abort_file(record):
    record['file'].close()
    os.remove(record['temp_file_path'])

def save_file(record):
    record['file'].close()
    sha256 = record['hasher'].hexdigest()
    record['sha256'] = sha256
    record['hashing_completed'] = int(datetime.now().timestamp()*TS_MULT)

    del record['file']
    del record['hasher']

    new_dir_path = hash2path(record['root_path'], sha256)
    if not os.path.exists(new_dir_path):
        os.makedirs(new_dir_path)
        new_file_path = os.path.join(new_dir_path, record['name'])
        shutil.move(record['temp_file_path'], new_file_path)
        make_file_read_only(new_file_path)
        record['new'] = True
    else: #data is already in hash store
        os.remove(record['temp_file_path'])
        record['new'] = False
        

def hash_file(path):
    hash_rec = {}
    hasher = hashlib.sha256()
    byte_count = 0
    hash_rec['hashing_started'] = int(datetime.now().timestamp()*TS_MULT)
    with open(path, 'rb') as fb:
        byte_block = fb.read(BLOCKLENGTH)
        while len(byte_block) > 0:
            byte_count = byte_count + len(byte_block)
            hasher.update(byte_block)
            byte_block = fb.read(BLOCKLENGTH)
    hash_rec['sha256'] = hasher.hexdigest()
    hash_rec['hashing_completed'] = int(datetime.now().timestamp()*TS_MULT)
    hash_rec['bytes_hashed'] = byte_count
    
    return hash_rec
        
def move_file(root_path, file_path):
    hash_rec = hash_file(file_path)
    new_dir_path = hash2path(root_path, hash_rec['sha256'])
    if not os.path.exists(new_dir_path):
        os.makedirs(new_dir_path)
        name = os.path.basename(file_path)
        new_file_path = os.path.join(new_dir_path, name)
        shutil.move(file_path, new_file_path)
        make_file_read_only(new_file_path)
        hash_rec['new'] = True
    else:
        #file already exists
        hash_rec['new'] = False
    return hash_rec

def check_file(root_path, hsh, num_bytes=False):
    path = hash2filepath(root_path, hsh)
    if not path:
        return False
    hasher = hashlib.sha256()
    byte_count = 0
    with open(path, 'rb') as fb:
        byte_block = fb.read(BLOCKLENGTH)
        while len(byte_block) > 0:
            byte_count = byte_count + len(byte_block)
            hasher.update(byte_block)
            byte_block = fb.read(BLOCKLENGTH)
    sha256 = hasher.hexdigest()
    if hsh != sha256:
        print('hash does not match:')
        print('record_hash={}'.format(hsh))
        print('stored_hash={}'.format(sha256))
        return False
    if num_bytes and (num_bytes != byte_count):
        print('num bytes does not match:')
        print('record={}'.format(num_bytes))
        print('stored={}'.format(byte_count))
        return False
    return True

def delete_file(root_path, hsh):
    dir_path = hash2path(root_path, hsh)
    if not os.path.exists(dir_path):
        print('directory {} does not exist'.format(dir_path))
        return False
    
    for f in os.listdir(dir_path):
        if not os.path.isfile(os.path.join(dir_path, f)):
            print('directory {} contains something other than '
                  'a regular file'.format(dir_path))
            print(os.path.join(dir_path, f))
            return False
    
    for f in os.listdir(dir_path):
        os.remove(os.path.join(dir_path, f))
        
    os.removedirs(dir_path)
    return True


def path2hash(root_path, path):
    #container dir path to hash
    rp = os.path.relpath(path, start=root_path)
    return ''.join(os.path.normpath(rp).split(os.path.sep))

def hash2path(root_path, hsh):
    #hash to container dir path
    parts = (hsh[0:2], hsh[2:4], hsh[4:6], hsh[6:8], hsh[8:])
    return os.path.join(root_path, *parts)

def hash2filepath(root_path, hsh):
    dir_path = hash2path(root_path, hsh)
    if not os.path.exists(dir_path):
        print('directory {} does not exist'.format(dir_path))
        return False
    
    # we ignore files starting with a dot e.g. .DS_Store
    ls = [f for f in os.listdir(dir_path) if not f.startswith('.')]
    
    if len(ls) == 0:
        print('directory exists but is empty')
        return False
    if len(ls) > 1:
        print('directory exists but contains more than one file!')
        return False
    return os.path.join(dir_path, ls[0])

In [5]:
BLOCKLENGTH = 262144 

In [6]:
# stuff to do Boolean queries

ATOM = 0
NOT = 1
OR = 2
AND = 3

class P(object):
    def __init__(self, *args, kind=ATOM):
        self.kind = kind
        self.args = args

    def __invert__(self):
        return P(self, kind=NOT)
    
    def __or__(self, other):
        return P(self, other, kind=OR)
    
    def __and__(self, other):
        return P(self, other, kind=AND)
    
    def to_str(self, atom_fun):
        if self.kind == ATOM:
            return atom_fun(self.args[0])
        if self.kind == NOT:
            return 'NOT {}'.format(self.args[0].to_str(atom_fun))
        if self.kind == OR:
            return '({} OR {})'.format(self.args[0].to_str(atom_fun), self.args[1].to_str(atom_fun))
        if self.kind == AND:
            return '({} AND {})'.format(self.args[0].to_str(atom_fun), self.args[1].to_str(atom_fun))  

def build_tag_query(p):
    atoms = {}
    atom_count = 1
    
    def walk(p):
        nonlocal atom_count
        nonlocal atoms
        if p.kind == ATOM:
            atoms[p.args[0]] = atom_count
            atom_count += 1
        else:
            for a in p.args:
                walk(a)
    
    walk(p)
                
    def atom_fun(a):
        return 'hash IN pred{}'.format(atoms[a])
    
    subqs = []
    params = []
    #need to check k[1] for safety here...
    for k, v in atoms.items():
        subqs.append('pred{} AS (SELECT hash FROM Tag WHERE key = ? AND value {} ?)'.format(v, k[1]))
        params.extend([k[0], k[2]])
    
    query = 'WITH {} SELECT * FROM Data WHERE {}'.format(', '.join(subqs), p.to_str(atom_fun))
    return (query, params)

In [7]:
class HashStash():
    def __init__(self, db_path):
        self.db_path = db_path
        if not os.path.exists(db_path):
            print('no file at {}. '
                  'creating a new database here.'.format(db_path))
            self.init_db()
        else:
            self.conn = sqlite3.connect(db_path)
            
    def __del__(self):
        self.conn.close()
            
    def init_db(self):
        self.conn = sqlite3.connect(self.db_path)
        c = self.conn.cursor()
        c.execute('CREATE TABLE Data ('
                  'hash TEXT NOT NULL, '
                  'num_bytes INT, '
                  'media_type TEXT, '
                  'title TEXT, '
                  'PRIMARY KEY (hash) );')
        self.conn.commit()
        c.execute('CREATE TABLE HashStore ('
                  'id INTEGER NOT NULL PRIMARY KEY, '
                  'name TEXT UNIQUE, '
                  'root_path TEXT UNIQUE );')
        self.conn.commit()
        c.execute('CREATE TABLE Copy ('
                  'hash TEXT NOT NULL, '
                  'store_id INT, '
                  'FOREIGN KEY(hash) REFERENCES Data(hash), '
                  'FOREIGN KEY(store_id) REFERENCES HashStore(id), '
                  'UNIQUE(hash, store_id) );')
        self.conn.commit()
        c.execute('CREATE TABLE Source ('
                  'hash TEXT NOT NULL, '
                  'description TEXT NOT NULL, '
                  'mtime_ns INT, '
                  'ctime_ns INT, '
                  'hashing_started INT NOT NULL, '
                  'hashing_completed INT NOT NULL, '
                  'FOREIGN KEY (hash) REFERENCES Data(hash) );')
        self.conn.commit()
        del c
        
    def add_store(self, root_path, name):
        c = self.conn.cursor()
        c.execute('INSERT INTO HashStore (name, root_path) '
                  'VALUES (?, ?)', (name, root_path))
        self.conn.commit()
        del c
        
    def get_store_info(self, store_name):
        c = self.conn.cursor()
        c.execute('SELECT id, root_path FROM HashStore '
                  'WHERE name = ?', (store_name,))
        rows = c.fetchall()
        if len(rows) == 0:
            print('no store with name {}'.format(store_name))
            return None
        if len(rows) > 1:
            print('unexpected: more than one '
                  'store with name {}'.format(store_name))
            return None
        return rows[0] #(id, root_path)
        
    def add_file(self, file_path,
                 store_name='default', move=True):
        store_id, store_root_path = self.get_store_info(store_name)
        path = os.path.abspath(file_path)
        name = os.path.basename(path)
        
        mtime = os.stat(path).st_mtime_ns
        ctime = os.stat(path).st_ctime_ns
        
        if move:
            # move the file in
            hash_rec = move_file(store_root_path, file_path)
        else:
            # copy mode
            hash_rec = open_file(store_root_path, name)
            with open(path, 'rb') as f:
                datablock = f.read(BLOCKLENGTH)
                while len(datablock) > 0:
                    write_to_file(hash_rec, datablock)
                    datablock = f.read(BLOCKLENGTH)
        
            mtime_2 = os.stat(path).st_mtime_ns
            if mtime_2 != mtime:
                # file modified while we were hashing it
                # it is possible that this method doesn't
                # detect modification
                # e.g. mod1...start hash...mod2...end hash
                # but time(mod1) = time(mod2) at the available
                # resolution - which is one second for some FS
                print('ABORTED. The file at {} '
                      'was modified while we were '
                      'computing its hash'.format(path))
                abort_file(hash_rec)
                return None
            else:
                save_file(hash_rec)
        
        sha256 = hash_rec['sha256']
            
        c = self.conn.cursor()
        c.execute('SELECT hash FROM Data '
                  'WHERE hash = ?',
                  (sha256,))
        rows = c.fetchall()
        if len(rows) == 0:
            # the hash is not currently in our DB

            # get media type
            p = hash2filepath(store_root_path, sha256)
            media_type = filetype.guess_mime(p)

            c.execute('INSERT INTO Data '
                      '(hash, num_bytes, media_type, title) '
                      'VALUES (?, ?, ?, ?);',
                      (sha256, hash_rec['bytes_hashed'],
                       media_type, name))

        c.execute('SELECT hash FROM Copy '
                  'WHERE hash = ? AND store_id = ?',
                  (sha256, store_id))
        rows = c.fetchall()
        if len(rows) == 0:
            # DB has no record of this copy
            c.execute('INSERT INTO Copy (hash, store_id) '
                      'VALUES (?, ?);',
                      (sha256, store_id))

        c.execute('INSERT INTO Source '
                  '(hash, description,'
                  ' mtime_ns, ctime_ns,'
                  ' hashing_started,'
                  ' hashing_completed) '
                  'VALUES (?, ?, ?, ?, ?, ?)',
                  (sha256, path,
                   mtime, ctime,
                   hash_rec['hashing_started'],
                   hash_rec['hashing_completed']))
        self.conn.commit()
        del c
        return sha256
    
    def create_tag_table(self):
        c = self.conn.cursor()
        c.execute('CREATE TABLE Tag '
                  '(hash TEXT, '
                   'key TEXT, '
                   'value TEXT, '
                   'FOREIGN KEY (hash) REFERENCES Data(hash) )')
        self.conn.commit()
        del c
    
    def add_tag(self, hsh, key, value):
        c = self.conn.cursor()
        c.execute('INSERT INTO Tag (hash, key, value) VALUES (?, ?, ?)',
                  (hsh, key, value))
        conn.commit()
        del c
        
    def get_path(self, hsh):
        store_id, store_root_path = self.get_store_info('default')
        return hash2filepath(store_root_path, hsh) #
        
    def add_tags(self, hsh, tag_fn):
        tags = tag_fn(self.get_path(hsh))
        c = self.conn.cursor()
        c.executemany('INSERT INTO Tag (hash, key, value) VALUES (?, ?, ?)',
                      [(hsh, k, v) for k, v in tags])
        self.conn.commit()
        del c
        
    def delete_sources(self, hsh):
        c = self.conn.cursor()
        c.execute('DELETE FROM Source WHERE hash = ?', (hsh,))
        self.conn.commit()
        del c
        
    def delete_copy(self, hsh, store_name):
        store_id, store_root_path = self.get_store_info(store_name)
        
        c = self.conn.cursor()
        c.execute('DELETE FROM Copy WHERE hash = ? AND store_id = ?',
                  (hsh, store_id))
        self.conn.commit()
        del c
        delete_file(store_root_path, hsh)
        
    def delete_hash(self, hsh):
        c = self.conn.cursor()
        c.execute('SELECT '
                  'Copy.store_id, name, root_path '
                  'FROM Copy JOIN HashStore '
                  'ON Copy.store_id = id '
                  'WHERE Copy.hash = ?', (hsh,))
        rows = c.fetchall()
        if len(rows) > 0:
            print('There are copies of this hash the stores:')
            for r in rows:
                print('{} (id:{}) @ {}'.format(r[1],r[0],r[2]))
            return False
        c.execute('DELETE FROM Data WHERE hash = ?', (hsh,))
        self.conn.commit()
        del c
        return True
        
    def check_integrity_of_store(self, store_name):
        store_id, store_root_path = self.get_store_info(store_name)
        
        c = self.conn.cursor()
        c.execute('SELECT hash FROM Copy WHERE store_id = ?',
                  (store_id,))
        REC_LIMIT = 1000
        row_block = c.fetchmany(REC_LIMIT)
        i = 0
        while len(row_block) > 0:
            print(i)
            i = i+1
            for r in row_block:
                is_good = check_file(store_root_path, r[0])
                if not is_good:
                    print('bad store of {}'.format(r[0]))
            row_block = c.fetchmany(REC_LIMIT)
    
    def quick_paths(self, query, root=None):
        if root==None:
            _, root = self.get_store_info('default')
        r = pd.read_sql(query, self.conn)
        r['path'] = r['hash'].map(lambda h: hash2filepath(root, h))
        return r
    
    

In [8]:
def to_str(x):
    if type(x) == mutagen.mp4.MP4FreeForm:
        return x.decode()
    return str(x)

def dict_to_tags(d, key_prefix=''):
    tag_list = []
    for k, v in d.items():
        if type(v) == list:
            for i in v:
                tag_list.append((key_prefix+k, to_str(i)))
        else:
            tag_list.append((key_prefix+k, to_str(v)))
    return tag_list

In [9]:
# FLAC tagging
from mutagen import flac as mflac

def flac_tags(file_path):
    f = mflac.FLAC(file_path)
    f_flat = dict_to_tags(dict(f), key_prefix='flac:')
    return f_flat

In [10]:
# MP4 tagging
from mutagen import mp4 as mmp4

def mp4_tags(file_path):
    m = mmp4.MP4(file_path)
    m_flat = dict_to_tags(dict(m), key_prefix='mp4:')
    return m_flat 

In [11]:
# MP3 tagging
from mutagen import mp3 as mmp3

def mp3_tags(file_path):
    m = mmp3.MP3(file_path)
    tags = []
    for k, v in dict(m).items():
        if k.startswith('PRIV'):
            continue
        for subv in getattr(v, 'text', []):
            tags.append(('id3:'+k, str(subv)))     
    return tags

In [12]:
def delete_hashes_full(hash_list):
    for hsh in hash_list:
        print(hsh)
        hs.delete_sources(hsh)
        hs.delete_copy(hsh, 'default')
        hs.delete_hash(hsh)

In [13]:
def file_recurse(path, func, *args, **kwargs):
    for dirname, dirs, files in os.walk(path):
        for file in files:
            if not file.startswith('.'):
                fp = os.path.join(dirname, file)
                if command_args.verbose:
                    print(fp)
                r = func(fp, *args, **kwargs)
                if command_args.verbose:
                    print(r)

In [14]:
hs = HashStash('/home/william/md.db')

In [15]:
command_args = lambda : None
command_args.verbose = True

In [16]:
if False:
    c = hs.conn.cursor()
    c.execute('INSERT INTO Tag (hash, key, value) '
              'SELECT hash, "podcast", value FROM Tag WHERE key = "id3:PCST"')
    hs.conn.commit()

In [17]:
pd.read_sql('SELECT * FROM Data LIMIT 10', hs.conn)

Unnamed: 0,hash,num_bytes,media_type,title
0,ca7a6bd9c1d7947eeaea3dae789aea484834fc7ef793a5...,13222501,audio/x-flac,Ryan Patrick Maguire - moDernisT.flac
1,2192dd5634d880af8576936ceb0f026e0f74e2feb6cb27...,3145728,audio/x-flac,20210219141549414447
2,00078e67daba3375fc335093b4540acb25b0096b8c4998...,3430003,audio/mpeg,15 Off the Beach (Spilling Coffee).mp3
3,00092329f004f36e27d1cea9360fd8c5816fc6d5a0154d...,9033808,audio/mpeg,05 Glitch.mp3
4,000c4f11952767e62f2d7fc0cdc8aafb1b1b6130bfd085...,22307302,audio/x-flac,1-18 DJ Food - The Breaks of Wrath.flac
5,00178511c4d32326e203af213b8aeee149d469230f616d...,6298257,audio/mpeg,06 Windscale 2.mp3
6,001bcb305fb8e381e78bfe243097e9ae8b327f6b819eb4...,10498976,audio/x-flac,10 Hippy Death Suite.flac
7,001cb7d4a25c7772b62dd2eb1001ef3fae8629c7f438c7...,6677607,audio/mpeg,07 The Spiracles.mp3
8,00273e61ebdc21ddad2e90bef7cc36b1ca6b33eed12bdc...,4273802,audio/mpeg,08 Victory Egg.mp3
9,002e8dd45932564e671b7507210c72df8bb4d73bf2d351...,3248284,audio/mpeg,04 Oraphis yn Delphie.mp3


In [18]:
music = pd.read_sql('SELECT Data.hash, a.value as artist FROM '
                    'Data LEFT JOIN (SELECT hash, value FROM Tag WHERE key = "artist") a '
                    'ON Data.hash = a.hash '
                    'WHERE Data.media_type LIKE "audio/%" ', hs.conn)

In [19]:
music.head()

Unnamed: 0,hash,artist
0,ca7a6bd9c1d7947eeaea3dae789aea484834fc7ef793a5...,Ryan Patrick Maguire
1,2192dd5634d880af8576936ceb0f026e0f74e2feb6cb27...,μ-Ziq
2,00078e67daba3375fc335093b4540acb25b0096b8c4998...,Swell Maps
3,00092329f004f36e27d1cea9360fd8c5816fc6d5a0154d...,Autechre
4,000c4f11952767e62f2d7fc0cdc8aafb1b1b6130bfd085...,DJ Food


In [23]:
music = pd.read_sql('SELECT Data.hash, a.value as artist, b.value as album FROM '
                    'Data JOIN (SELECT hash, value FROM Tag WHERE key = "artist") a '
                    'ON Data.hash = a.hash '
                    'JOIN (SELECT hash, value FROM Tag WHERE key = "album") b '
                    'ON Data.hash = b.hash '
                    'WHERE Data.media_type LIKE "audio/%" ', hs.conn)

In [24]:
music

Unnamed: 0,hash,artist,album
0,013b7c68c3d43cb37112257a7aa53754dd0a2d313247e2...,Deniece Williams,Footloose: 15th Anniversary Collector's Edition
1,01a9d1ba325b1d5af145843652d667e4d3cc8d5f4b2900...,Talking Heads,More Songs About Buildings and Food
2,01eb1d88af840bd273528c220e5d817156c2e9a5d3b23a...,The Fall,Slates
3,02969752d01ad2fb103d884ac42da3e603687a089b6c73...,Talking Heads,Fear of Music
4,03d18386b59fbcca4b42faede16e71b771b8e055d52e0a...,Beat Happening,Black Candy
...,...,...,...
10922,2083661abcaf6f148b105801934546c36995e176307fc1...,Grandaddy,A Pretty Mess By This One Band [EP]
10923,20863bfc1a621a8517297b5692e7608d8e38af2d9801ce...,The Hospitals,I've Visited The Island Of Jocks and Jazz
10924,20c9cb1b87ff366c25fbd9f977cb58b78a9e1f5c55479c...,Björk,Vespertine
10925,fb0fa5c8cee83b7d8130ea6021a507367701307a95cdad...,Ken and Andy and WFMU,Seven Second Delay with Ken and Andy Podcast |...


In [29]:
music = pd.read_sql('SELECT Data.hash, Data.title, a.value as artist, b.value as album, t.value as title FROM '
                    'Data JOIN (SELECT hash, value FROM Tag WHERE key = "artist") a '
                    'ON Data.hash = a.hash '
                    'JOIN (SELECT hash, value FROM Tag WHERE key = "album") b '
                    'ON Data.hash = b.hash '
                    'JOIN (SELECT hash, value FROM Tag WHERE key = "title") t '
                    'ON Data.hash = t.hash '
                    'WHERE Data.media_type LIKE "audio/%" '
                    'ORDER BY artist, album, title', hs.conn)

In [31]:
music.to_csv('music.csv')

In [25]:
q, params = build_tag_query(P(('flac:artist', 'LIKE', 'St%')) & 
                             P(('flac:title', 'LIKE', 'B%')))
print(q)
print(params)
print('--')
r = pd.read_sql(q, hs.conn, params=params)
r


WITH pred1 AS (SELECT hash FROM Tag WHERE key = ? AND value LIKE ?), pred2 AS (SELECT hash FROM Tag WHERE key = ? AND value LIKE ?) SELECT * FROM Data WHERE (hash IN pred1 AND hash IN pred2)
['flac:artist', 'St%', 'flac:title', 'B%']
--


Unnamed: 0,hash,num_bytes,media_type,title
0,02277212cd1397c19cebcd52c67ee6be8d99b09242320b...,27505122,audio/x-flac,11 Bop Scotch.flac
1,51c92d74f402a6b4ae99c2e58df6f12afc48f3d801333c...,13106644,audio/x-flac,01.Black_Ants_in_Sound-Dust.flac
2,b4d1daad3df6be36f0fe09c8225d69309d141f16b82c45...,22285863,audio/x-flac,03 Barock - Plastik.flac
3,e01febea9cd164ac46b918400905151d67bb2bf28aa745...,35686523,audio/x-flac,04.Baby_Lulu.flac


In [32]:
del(hs)

In [14]:
#import pandas as pd
#pd.options.display.max_rows = None

In [15]:
#delete_hashes_full(r['hash'].value_counts().index)

In [16]:
#pd.options.display.max_rows = None

In [17]:
#pd.read_sql('SELECT hash, title FROM Data WHERE title LIKE "%Chimes%"', hs.conn)

In [18]:
#import pandas as pd

In [19]:
#hs.create_tag_table()

In [20]:
#hs.add_store('/media/wdm/magwitch/hashstash/data', 'default')

In [21]:
#pd.read_sql('SELECT media_type, count(hash) FROM Data GROUP BY media_type', hs.conn)

In [22]:
# tag all the flacs
#flac_shas = pd.read_sql('SELECT hash FROM Data WHERE media_type="audio/x-flac"',
#                        hs.conn)['hash']
#for sha in flac_shas:
#    print(sha)
#    hs.add_tags(sha, flac_tags)

In [23]:
# tag all the mp3
#flac_shas = pd.read_sql('SELECT hash FROM Data WHERE media_type="audio/mpeg"',
#                        hs.conn)['hash']
#for sha in flac_shas:
#    print(sha)
#    hs.add_tags(sha, mp3_tags)

In [24]:
# tag all the mp4
#shas = pd.read_sql('SELECT hash FROM Data WHERE media_type="audio/m4a"',
#                   hs.conn)['hash']
#for sha in shas:
#    print(sha)
#    hs.add_tags(sha, mp4_tags)

In [25]:
#quick_paths('SELECT hash, title FROM Data WHERE media_type="image/bmp"',
#            '/media/wdm/magwitch/hashstash/data').loc[0, 'path']

In [26]:
#hs.check_integrity_of_store('default')

In [28]:
# lost file - file this in the stash!
# 09997552-1EF0-4678-A86D-00E70BBBB6B1