In [26]:
import os
import shutil
import csv
import re

In [60]:
def renamer(ref_csv, input_path, output_path, prepend=False): 
    """ renames sequencing file according to reference csv """
    names_ref = read_csv(ref_csv)
    
    # make output dir if doesn't already exist
    if not os.path.isdir(output_path): 
        os.mkdir(output_path)
        
    # make a copy of original directory and put it in output path (for safekeeping purposes)
    copy_data_path = output_path + "/data_copy"
    if not os.path.exists(copy_data_path): 
        os.mkdir(copy_data_path)
    shutil.copytree(input_path, copy_data_path, dirs_exist_ok=True)
    
    # get all .ab1 files and their indices
    input_dir_raw = os.listdir(input_path)
    ab_files = [x for x in input_dir_raw if x[-4:] == '.ab1']
    ab_file_indices = [int(re.findall('-\d+', x)[0][1:]) for x in ab_files]
    
    # get corresponding name for each file based on index
    def findName(ab_file_index): 
        for i in names_ref: 
            if int(i[0]) == ab_file_index: 
                # check if user put .ab1 suffix in ref csv
                if len(i[1]) > 4 and i[1][-4:] == '.ab1': 
                    return i[1][:-4]
                else: 
                    return i[1]
        raise Exception("cannot find index in csv")
    
    # copy and rename each file using reference csv
    for i in range(len(ab_file_indices)):
        new_path = shutil.copy2(input_path+"/"+ab_files[i], output_path)
        if not prepend: 
            new_name = f"{output_path}/{findName(ab_file_indices[i])}.ab1"
            
        else: 
            new_name = f"{output_path}/{findName(ab_file_indices[i])}_{ab_files[i]}"
        os.rename(src=new_path, dst=new_name)

In [56]:
def read_csv(csv_path): 
     """ helper function to read reference csv """
    csv_file = open(csv_path)
    csvreader = csv.reader(csv_file)
    next(csvreader)
    matrix = []
    for row in csvreader: 
        matrix.append(row)
    csv_file.close()
    
    # check csv in correct format, may change later to accommodate more functionality
    assert len(matrix) > 0
    assert len(matrix[0]) >= 2
    
    return matrix
    

In [59]:
renamer("/Users/ehuang/Downloads/2022222_arpc3_ms1_ox_rnf_F2_CRS59_genotyping.csv", 
        "/Users/ehuang/Downloads/2022222 arpc3 msI sequencing", 
        'Test Data/output_dir')