## Copy/move files from CSV files
This notebook allow to copy or move files based on two columns of a CSV file 

### Load libraries

In [4]:
import os
import shutil
import pandas

### Settings

In [5]:
inputdir = r'D:\FYP\Dataset\folder_structure\supervisely\wrist\img' # Image folder, including subfolders
filetypes = set(['.png', '.jpg', '.jpeg', '.bmp']) # Define filetypes to include
outputdir = r'D:\FYP\Dataset\Output Dataset With Classes' # Output folder
move_files = False # Move files to destination, otherwise copy the files 
csv_path = r'D:\FYP\Dataset\dataset.csv' # Path to the CSV file
csv_column1 = 'filestem' # First column of CSV containing filenames
csv_column2 = 'fracture_visible' # Second column of CSV containing sort variable, e.g. 'Fracture'
csv_delimiter = ',' # CSV column delimiter

### Main code

In [None]:
if not os.path.exists(inputdir) or inputdir == outputdir or not os.path.isfile(csv_path):
    print('ERROR (Directory/file issues): Please check paths.')
    exit()

# Fill dictionary from columns
df = pandas.read_csv(csv_path, dtype=str, sep=csv_delimiter, usecols=[csv_column1, csv_column2]).dropna()
csv = df.set_index(csv_column1)[csv_column2].to_dict()

# Iterate over FILES in folder and subfolders
def main():
    for paths, _, files in os.walk(os.path.normpath(inputdir), topdown=True):
        for file in files:
                if any(x in file.lower() for x in filetypes) == True:
                    filestem = os.path.basename(file).split('.')[0]
                    inputfile = ''
                    inpfile = os.path.join(paths, file)
                    outputpath = ''
                    patient_info = file.split('_')
                    pid = patient_info[0]
                    ext = patient_info[1]
                    rc = patient_info[2]
                    projection = patient_info[3]
                    sex_and_age = patient_info[4].split('.')[0]
                    inp_file_name = ''
                    
                    if filestem in csv and csv[filestem]:
                        outputpath = os.path.normpath(os.path.join(outputdir, csv.get(filestem, '')))
                        inp_file_name = f'{pid}_{ext}_{rc}_{projection}_{sex_and_age}_1.png'
                    else:
                        outputpath = os.path.normpath(os.path.join(outputdir, '0'))
                        inp_file_name = f'{pid}_{ext}_{rc}_{projection}_{sex_and_age}_0.png'
                        
                    outputfile = os.path.join(outputpath, inp_file_name)
                    if os.path.isfile(outputfile) == True: 
                        print('SKIPPED (File exists), '+ inpfile)
                        continue
                    if not os.path.exists(outputpath): os.makedirs(outputpath)
#                     if move_files == True: shutil.move(inpfile, outputfile)
#                     else: 
                    shutil.copy2(inpfile, outputfile)
                    print('SUCCESS (File processed), ' + outputfile)
if __name__ == '__main__':
    main()