# File Sorter

sort file by their extension and then move them to their respective folders

In [1]:
dir_loc = r'C:\Users\amaru\Documents\SCRIPT_TEST\file_sorter'

# Setup

## Libraries

In [2]:
import glob2 as glob2
import pandas as pd
import pathlib
import shutil

## Extension Metadata

In [3]:
metadata_path = r'C:\Users\amaru\Documents\Data Analyst\didactic-tribble\file_sorter\file_sorter_metadata.csv' # separate file as metadata for flexible updating
metadata_lib = pathlib.PureWindowsPath(metadata_path.strip()).as_posix()

file_dict = pd.read_csv(metadata_lib, sep=';')
file_dict.head()

Unnamed: 0,extension,category
0,aac,Audio
1,flac,Audio
2,m4a,Audio
3,mp3,Audio
4,ogg,Audio


## Directory

### Sub-folder

In [4]:
folder_output = glob2.glob(dir_loc + '/**/', recursive=True)

folder_output

['C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Audio\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Compressed\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Document\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Executable\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Others\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Picture\\',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Video\\']

# Processing

In [5]:
file_list = glob2.glob(dir_loc + '/*.*')
print(len(file_list), 'file(s)')
file_list

9 file(s)


['C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\1699248981616.png',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\20240307_SURAT REFUND_SITI NURDIANA.docx',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\BepInEx_x64_5.4.22.0.zip',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\calibre-5.33.2.msi',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\FORMAT SURAT REFUND NEW.pdf',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\full-score-testcases.zip',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\TeamViewer_Setup_x64.exe',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\Untitled.ipynb',
 'C:\\Users\\amaru\\Documents\\SCRIPT_TEST\\file_sorter\\videoplayback.mp4']

In [6]:
df = pd.DataFrame(file_list)
df.rename(columns={0:'path'}, inplace=True)

df.head()

Unnamed: 0,path
0,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
1,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
2,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
3,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
4,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...


In [7]:
filenames = []
for file in df['path']:
    name = file[file.rfind('\\')+1:len(file)]
    filenames.append(name)\
    
df['file'] = filenames

In [8]:
extension = []
for file in df['file']:
    ext = file[file.rfind('.')+1:len(file)]
    extension.append(ext)
    
df['extension'] = extension

In [9]:
df = df.merge(file_dict, how='left', on='extension')

In [10]:
new_path = []

for file, cat in zip(df['file'],df['category']):
    if pd.isna(cat) == True:
        destination = folder_output[0] + '\\' + 'Others' + '\\'
    else:
        destination = folder_output[0] + '\\' + cat + '\\'
        
    new_path.append(destination)
    
df['new_path'] = new_path

In [11]:
df.head()

Unnamed: 0,path,file,extension,category,new_path
0,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...,1699248981616.png,png,Picture,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
1,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...,20240307_SURAT REFUND_SITI NURDIANA.docx,docx,Document,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
2,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...,BepInEx_x64_5.4.22.0.zip,zip,Compressed,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
3,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...,calibre-5.33.2.msi,msi,Executable,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...
4,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...,FORMAT SURAT REFUND NEW.pdf,pdf,Document,C:\Users\amaru\Documents\SCRIPT_TEST\file_sort...


# File Moving

In [15]:
for source, destination in zip(df['path'],df['new_path']):
    shutil.move(source, destination)

In [16]:
summary_list = glob2.glob(dir_loc + '/*.*')

if len(summary_list) > 0:
    print(len(summary_list), 'file(s) not moved')
    print(summary_list)
else:
    print('All files are moved')

All files are moved
