# File Sorter

Is your 'Downloads' folder a mess? Do you want to tidy it up? Too lazy to do it? This script is for you then.

In [1]:
dir_loc = r'C:\Users\amaru\Documents\SCRIPT_TEST\file_sorter' # put your 'Downloads' path here

# Setup

## Libraries

In [2]:
import glob2 as glob2
import pandas as pd
import pathlib
import shutil

## Extension Metadata
for easier access: add, delete, etc
I decide to create the metadata as CSV format; accessible from other apps such as your old pal notepad to your trusty Excel

In [3]:
metadata_path = r'C:\Users\amaru\Documents\Data Analyst\didactic-tribble\file_sorter\file_sorter_metadata.csv' # separate file as metadata for flexible updating
metadata_lib = pathlib.PureWindowsPath(metadata_path.strip()).as_posix()

file_dict = pd.read_csv(metadata_lib, sep=';')
file_dict.head()

Unnamed: 0,extension,category
0,aac,Audio
1,flac,Audio
2,m4a,Audio
3,mp3,Audio
4,ogg,Audio


## Directory

### Sub-folder
Path for each file extension category

In [4]:
folder_output = glob2.glob(dir_loc + '/**/', recursive=True)

# Functions

In [5]:
file_list = glob2.glob(dir_loc + '/*.*') # List all file(s) in the input directory

In [6]:
def file_sorter():
    df = pd.DataFrame(file_list) # convert the list to pandas df, to make extension matching to metadata easier
    df.rename(columns={0:'path'}, inplace=True)
    
    # Extracting file names
    filenames = []
    for file in df['path']:
        name = file[file.rfind('\\')+1:len(file)]
        filenames.append(name)
    df['file'] = filenames
    
    
    # Extracting file extension
    extension = []
    for file in df['file']:
        ext = file[file.rfind('.')+1:len(file)]
        extension.append(ext)
    df['extension'] = extension
    
    
    # Matching with metada
    df = df.merge(file_dict, how='left', on='extension')
    
    
    # Generating new path for all file(s)
    new_path = []

    for file, cat in zip(df['file'],df['category']):
        if pd.isna(cat) == True:
            destination = folder_output[0] + '\\' + 'Others' + '\\' # All unmapped files in metadata will be moved to 'Others' folder
        else:
            destination = folder_output[0] + '\\' + cat + '\\'
        new_path.append(destination)
    df['new_path'] = new_path
    
    
    # MOVING THE FILES
    for source, destination in zip(df['path'],df['new_path']):
        shutil.move(source, destination)
    
    summary_list = glob2.glob(dir_loc + '/*.*')

    if len(summary_list) > 0:
        print(len(summary_list), 'file(s) are not moved')
        print(summary_list)
    else:
        print(len(df), 'file(s) are moved')

# Processing

In [7]:
if len(file_list) == 0:
    print('No files need to be moved')
else:
    file_sorter()

9 file(s) are moved


# Reversal Script
by default the following line is set as Raw