In [1]:
import os
import subprocess
import numpy as np
import pandas as pd

def get_raw_lsl(path):
    """
    Returns list of files/folders at path, excluding links.
    """
    lnk = [i.split()[8].split('->')[0].strip()
           for i in subprocess.run(["ls", "-la", path], 
                                   capture_output=True, 
                                   text=True).stdout.split('\n')
           if len(i.split()) >= 8 and '->' in i]
    return np.array([[i[i.index(i.split()[7])+5:].strip(),
                      int(i.split()[4]),
                      i.split()[0].startswith('d')]
                     for i in subprocess.run(["ls", "-Lla", path], 
                                             capture_output=True, 
                                             text=True).stdout.split('\n') 
                     if len(i.split()) >= 8 
                        and i.split()[8] not in ['.', '..', *lnk]],
                    dtype=object)

def get_directory_size(path):
    """
    Calculates the total size of a directory, including all subdirectories and files 
    but excluding linked folders/files.
    """
    total_size = 0
    dlinks = []
    for dirpath, dirnames, filenames in os.walk(path):
        if dirpath.split('\\')[-1] not in [i[0] for i in get_raw_lsl('\\'.join(dirpath.split('\\')[:-1]))]:
            dlinks += [dirpath]
        elif not any([dirpath.startswith(i) for i in dlinks]):        
            for f in filenames:
                fp = os.path.join(dirpath, f)
                if not os.path.islink(fp):
                    try:
                        total_size += os.path.getsize(fp)
                    except:
                        pass    
    return total_size

known = dict()
def space(path, refresh=False):
    """
    Returns list of files/folders at path, excluding links, with total size of each.
    """
    path = os.path.normpath(path)
    if path not in known or refresh:
        raw_lsl = get_raw_lsl(path)
        filt = raw_lsl[:, 2].astype(bool)
        raw_lsl[filt, 1] = np.vectorize(get_directory_size, otypes=[int])\
                               (np.vectorize(lambda x: os.path.join(path, x), otypes=[str])
                                    (raw_lsl[filt, 0]))        
        df = pd.DataFrame(raw_lsl[:, :2], columns=['name', 'GB'])
        df['GB'] = np.round(df['GB'] / 1024**3, 3)
        known[path] = df.sort_values(by='GB', ascending=False).reset_index(drop=True)

    return known[path]

In [3]:
space('C:\\Users\\gavin', True)


KeyboardInterrupt

