# Windows - Shared Drive Scanner

Scan a drive (and all sub-folders) to find file sizes & owners

In [None]:
# imports 
import win32security 
import os 
import pandas as pd
import smtplib 
from email.message import EmailMessage 

pd.set_option("max_colwidth", None)
pd.set_option("max_columns", None) 

Input top level directory you wish to scan from in `dir_name` and the folder path of where to save the CSV results to `csv_save` 

In [None]:
# to scan from 
dir_name = r"C:\example\file\path" 

# location to save results to 
csv_save = r"C:\example\save\location" 

csv_name = "driveScanResults.csv" 

In [None]:
# key functions to use 
def emailSend(Host=None, From=None, To=None, Subject=None, Message=None, Signature=None):
    """
    Function to send an email from within a python script, to a list of provided emails

    params:
        host - host to push the email through 
        from - the sender email address
        to - list of email addresses to be sent the mail
        subject - subject header you would like email to have 
        message - the body of text to be in the email 
        signature - the sign off for the email 

    returns:
        an output email to designated addresses within 'to' list 
    """
    msg = EmailMessage() 
    msg['Subject'] = Subject 
    msg['From'] = From 
    msg['To'] = To 
    msg.set_content(Message + "\n" + Signature) 

    # push email
    smtplib.SMTP(Host).send_message(msg) 
    smtplib.SMTP(Host).quit() 


# -----------------------------------------------------------

def get_files_and_owner(root, dirs, filename):
    # full file path 
    f = os.path.join(root, filename) 
    # size of each file 
    size = os.path.getsize(os.path.join(root, f)) 
    # get owner information - SID
    sd = win32security.GetFileSecurity(f, win32security.OWNER_SECURITY_INFORMATION) 
    owner_sid = sd.GetSecurityDescriptorOwner() 
    # pull out the name 
    name, domain, type = win32security.LookupAccountSid(None, owner_sid) 

    # return dict with relevant info 
    return {'owner':name, 'size':size, 'location':root, 'filename':filename}


# -----------------------------------------------------------

def size_as_MB(size):
    return size/2**20


# ------------------------------------------------------------

def scanner(filepath=None):
    # set counters and lists to zero / empty for future population 
    filecount = 0
    myfiles=[] 
    myfilecount=0 

    errors=[] 
    errorcount=0 

    # run through each directory in the required filepath 
    for root, dirs, files in os.walk(filepath):
        # cycle through each file in directories 
        for filename in files:
            try:
                # get details of each file 
                file_details = get_files_and_owner(root, dirs, filename) 
                filecount +=1 
                owner = file_details['owner'] 

                # create size check cut off - only pull back stuff bigger than 50mb 
                file_size = size_as_MB(file_details['size']) 
                if file_size >= 50:
                    # append directory, name and siz eof file in MB 
                    myfiles.append([root, filename, size_as_MB(file_details['size']), owner])
                    myfilecount += 1

            # any part of process fails 
            except Exception as e:
                errors.append([root+filename, repr(e)]) 
                errorcount += 1

    # create output list 
    filelist = myfiles 
    print("files processed = {}".format(filecount)) 
    print("errors = {}".format(errorcount)) 

    return filelist 

Now process the scan 

In [None]:
try:
    scanned_files = scanner(dir_name) 

    # collect results to pandas df 
    scanned_files_df = pd.DataFrame(scanned_files, columns=["Location","file_name","file_size_mb","owner"]) 

    # sort by size desc 
    scanned_files_out = scanned_files_df.sort_values(by='file_size_mb', ascending=False).round(decimals=4) 

    # write results out to CSV 
    save_path = csv_save + "\\" + csv_name 

    scanned_files_out.to_csv(save_path, index=False) 
    print("Scan successful - output saved") 

except Exception as e:
    print("ERROR - scan has failure") 
    print(e) 