In [1]:
import os
import shutil
import csv
import re
import ipywidgets as widgets

In [2]:
def read_csv(csv_path, from_gen=False): 
    """ helper function to read reference csv """
    csv_file = open(csv_path)
    csvreader = csv.reader(csv_file)
    next(csvreader)
    matrix = []
    for row in csvreader: 
        matrix.append(row)
    csv_file.close()
    
    # check csv in correct format, may change later to accommodate more functionality
    assert len(matrix) > 0
    assert len(matrix[0]) >= 2
    
    return matrix
    

In [3]:
def renamer(ref_csv, input_path, output_path, prepend=False, from_gen=False): 
    """ renames sequencing file according to reference csv """
    names_ref = read_csv(ref_csv)
    
    # make output dir if doesn't already exist
    if not os.path.isdir(output_path): 
        os.mkdir(output_path)
        
    # make a copy of original directory and put it in output path (for safekeeping purposes)
    copy_data_path = output_path + "/data_copy"
    if not os.path.exists(copy_data_path): 
        os.mkdir(copy_data_path)
    shutil.copytree(input_path, copy_data_path, dirs_exist_ok=True)
    
    # get all .ab1 files and their indices
    input_dir_raw = os.listdir(input_path)
    ab_files = [x for x in input_dir_raw if x[-4:] == '.ab1']
    ab_file_indices = [int(re.findall('-\d+', x)[0][1:]) for x in ab_files]
    
    # get corresponding name for each file based on index
    def findName(ab_file_index): 
        for i in names_ref: 
            if int(i[0]) == ab_file_index: 
                # check if user put .ab1 suffix in ref csv
                if len(i[1]) > 4 and i[1][-4:] == '.ab1': 
                    return i[0] + "_" + i[1][:-4]
                else: 
                    return i[0] + "_" + i[1]
        raise Exception("cannot find index in csv")
    
    # copy and rename each file using reference csv
    for i in range(len(ab_file_indices)):
        new_path = shutil.copy2(input_path+"/"+ab_files[i], output_path)
        %run Generator.ipynb
        new_name = findName(ab_file_indices[i]) if not from_gen else generate_names(names_ref)
        if not prepend: 
            new_name = f"{output_path}/{new_name}.ab1"
            
        else: 
            new_name = f"{output_path}/{new_name}_{ab_files[i]}"
        os.rename(src=new_path, dst=new_name)

In [4]:
widgets.interact_manual(renamer, 
                 ref_csv="", 
                 input_path="", 
                 output_path="")

interactive(children=(Text(value='', description='ref_csv'), Text(value='', description='input_path'), Text(va…

<function __main__.renamer(ref_csv, input_path, output_path, prepend=False, from_gen=False)>

In [5]:
renamer("Test Data/name_ref.csv", 
        "/Users/ehuang/Downloads/EH", 
        "Test Data/output_dir", 
        prepend=True, 
        from_gen=True)



interactive(children=(Text(value='', description='base'), Text(value='', description='genes'), Text(value='', …

OSError: [Errno 63] File name too long: 'Test Data/output_dir/42-2_B06_046.ab1' -> "Test Data/output_dir/[['1', '1_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['2', '2_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['3', '3_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['4', '4_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['5', '5_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['6', '6_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['7', '7_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['8', '8_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['9', '9_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['10', '10_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['11', '11_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['12', '12_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['13', '13_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['14', '14_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['15', '15_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['16', '16_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['17', '17_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['18', '18_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['19', '19_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['20', '20_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['21', '21_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['22', '22_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['23', '23_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['24', '24_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['25', '25_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['26', '26_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['27', '27_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['28', '28_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['29', '29_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['30', '30_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['31', '31_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['32', '32_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['33', '33_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['34', '34_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['35', '35_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['36', '36_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['37', '37_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['38', '38_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['39', '39_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['40', '40_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['41', '41_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['42', '42_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['43', '43_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['44', '44_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['45', '45_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['46', '46_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['47', '47_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['48', '48_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213'], ['49', '49_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_215'], ['50', '50_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_13'], ['51', '51_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_99'], ['52', '52_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_100'], ['53', '53_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_43'], ['54', '54_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_41'], ['55', '55_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_220'], ['56', '56_20220303arpc3-mNG_homozygote_knockins_amp_213_215_seq_213']]_42-2_B06_046.ab1"

In [30]:
"""
TODO: 

figure out plug in

upload file instead of having user input path? widgets.FileUpload

"""

'\nTODO: \n\nupload file instead of having user input path? widgets.FileUpload\n\n'