In [None]:
import os
import glob
import re
import pandas as pd
import string

In [None]:
all_well_rows = string.ascii_uppercase[0:16]
all_well_cols = [str(num).zfill(2) for num in range(1,25)]
valid_well_rows = all_well_rows[2:14]
valid_well_cols = all_well_cols[2:22]
#print(valid_well_rows)
#print(valid_well_cols)
### get all wells
all_wells = [row + col for row in all_well_rows for col in all_well_cols]
all_wells.sort()
#print(all_wells)
### get valid wells
valid_wells = [row + col for row in valid_well_rows for col in valid_well_cols]
valid_wells.sort()
#print(valid_wells)

folder_dict = {
        '2020-11-17':'rep3',
        '2021-02-19':'rep4',
        '2021-02-26':'rep5',
        '2021-03-02':'rep6',
        '2021-04-06':'rep7',
        '2021-04-23':'rep8',
        '2021-05-18':'rep9',
        '2021-05-21':'rep10',
        '2021-06-11':'rep11',
        '2021-07-27':'2_rep1/210727-combo-rep1',
        '2021-07-30':'2_rep2/210730_combo_rep2',
        '2021-08-06':'2_rep3/210806_combo_rep3',
        '2021-10-05':'redo_rep1_and_2',
        '2021-10-15':'redo_rep1_and_2/redo_rep2',
        '2021-10-29':'redo_rep3'
    }


def read_well_data(date, barcode, well):
    ff = get_well_file(date, barcode, well)
    df = pd.read_csv(ff)
    return(df)

def get_well_file(date, barcode, well):
    data_dir = get_data_dir(date, barcode)
    ### example file style
    f1 = barcode+".result."+well+"[test].csv"
    #f2 = barcode+".result."+well+"[test].csv"
    files = os.listdir(data_dir)
    if f1 in files:
        well_file = os.path.join(data_dir, f1)
    else:
        print("well csv not found!")
    return(well_file)


## input date: e.g. '2021-10-15'
## output: list of barcodes e.g. '211015_combo_173'
def get_barcodes(date):
    date_formatted = date_format_switch(date)
    main_dir = get_data_dir(date)
    dirs = [ x for x in os.listdir(main_dir) if os.path.isdir( os.path.join(main_dir, x) )]
    ### match date at the start of the sub-directory
    dirs_barcodes = [ x for x in dirs if bool(re.match(date_formatted+"_combo", x)) ]
    return( dirs_barcodes )

## input date: e.g. '2021-02-19'
## output: e.g. '210219'
def date_format_switch(date):
    new_str = date[2:4] + date[5:7] + date[8:10]
    return(new_str)

def get_data_dir(date, barcode=None, base_dir = "/mnt/y/lsp-analysis/LINCS-combinations/"):
    ### note: using unix folder conventions -- would need to re-write for Windows
    ### set for osx
    if not os.path.exists(base_dir):
        base_dir = "/Volumes/hits/lsp-analysis/LINCS-combinations/"
    if barcode == None:
        plate_dir = ''
    else:
        plate_dir = barcode
    local_dir = folder_dict[date]
    full_dir = os.path.join(base_dir, local_dir, plate_dir)
    return(full_dir)

In [None]:
get_data_dir('2021-02-19')
get_data_dir('2021-10-15', "211015_combo_176")

In [None]:
date_format_switch("2021-02-19")

In [None]:
re.search("234", "abdsf234")

In [None]:
get_barcodes('2021-10-15')

In [None]:
df = read_well_data('2021-10-15', '211015_combo_173', 'C05')
df.columns

In [None]:
### get column names for each date
for date in folder_dict.keys():
    print(date)
    barcodes = get_barcodes(date)
    barcodes.sort()
    test_plate = barcodes[0]
    df = read_well_data(date, test_plate, 'D06')
    print(df.columns)

In [None]:
for date in list(folder_dict.keys()):
    print(date)
    barcodes = get_barcodes(date)
    barcodes.sort()
    for barcode in barcodes:
        #df = read_well_data(date, plate, 'D06')
        #print(df.columns)
        print(barcode)
        data_dir = get_data_dir(date, barcode)
        #all_files = [ file for file in os.listdir(data_dir) if not os.path.isdir( os.path.join(data_dir, file) )]
        #csvs = [ file for file in all_files if file.endswith("csv") ]
        #well_files = [ file for file in csvs if file.startswith(barcode)]
        #well_files.sort()
        #print(len(well_files))
        #print(well_files[0])
        missing = False
        for well in valid_wells:
            ### example file style
            f1 = barcode+".result."+well+"[test].csv"
            f1_full = os.path.join(data_dir, f1)
            check = os.path.exists( f1_full )
            if not check:
                #print(well)
                missing = True
        if missing:
            print("missing some")

In [None]:
test = list(folder_dict.keys())
test[0]