In [2]:
from PIL import Image
from pathlib import Path
import argparse
from tqdm import tqdm
import glob
import os
import pandas as pd
from re import search

### Defining Paths of directories

In [None]:
path = '/home/ubuntu/bucket/projects'
projdir = '2017_10_19_Profiling_rare_ORFs'
batchname = '20200727_96W_CP185'
platename = '20X_CP_CP185_2'


batchpath = path + "/" + projdir + "/" + batchname
fpath = batchpath + "/" + platename
outpath = path + "/" + projdir + "/" + 'workspace' + '/' + 'load_data_csv' + '/' + batchname + '/' + platename

### making output directory

In [None]:
def makedirectory():
    try:
        os.makedirs(outpath)
        print("Directory", outpath, "is created")
    except IOError:
        print("Directory", outpath, "already exists")
        
directory = makedirectory()


### Creating load data csvs

In [None]:
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

def load_data():
    
    lst=[]

    for i, image in enumerate(os.listdir(fpath)):
        imgpath = fpath + "/" + image
        head, tail = os.path.split(imgpath)
        lst.append(tail)
   
    

    ch0 = [s for s in lst if "_ch_0" in s]
    ch1 = [s for s in lst if "_ch_1" in s]
    ch2 = [s for s in lst if "_ch_2" in s]
    ch3 = [s for s in lst if "_ch_3" in s]

    zippedlist = list(zip(ch0, ch1, ch2, ch3))


    df = pd.DataFrame(zippedlist, columns=['FileName_OrigDNA', 
                                        'FileName_OrigProtein', 
                                        'FileName_OrigMito',
                                       'FileName_OrigER'])

    df['PathName_OrigDNA'] = fpath
    df['PathName_OrigProtein'] = fpath
    df['PathName_OrigMito'] = fpath
    df['PathName_OrigER'] = fpath
    df['Metadata_Plate'] = platename
    df['Metadata_Well'] = (df['FileName_OrigDNA']
                       .str.split('_')
                       .str.get(5)
                       .str.split('-')
                       .str.get(0)
                       .apply(lambda x: x[0] + str(0) + x[1] if len(x) < 3 else x)
                          )
                           
    df['Metadata_Site'] = (df.FileName_OrigDNA
                       .str.split('_')
                       .str.get(6)
                       .str.split('.')
                       .str.get(0))


    colnames = ['FileName_OrigDNA', 'PathName_OrigDNA',
             'FileName_OrigProtein', 'PathName_OrigProtein',
             'FileName_OrigMito', 'PathName_OrigMito',
             'FileName_OrigER', 'PathName_OrigER', 'Metadata_Plate', 'Metadata_Well', 'Metadata_Site']

    return df.reindex(columns=colnames)

loaddata = load_data()

loaddata.to_csv(outpath + "/" + 'load_data.csv')


### Creating load_data_with_illum csv

In [None]:
def load_data_with_illum():
    
    lst=[]

    for i, image in enumerate(sorted(os.listdir(fpath))):
        imgpath = fpath + "/" + image
        head, tail = os.path.split(imgpath)
        img_name = os.path.splitext(tail)[0]
        lst.append(tail)
    

    ch0 = [s for s in lst if "_ch_0" in s]
    ch1 = [s for s in lst if "_ch_1" in s]
    ch2 = [s for s in lst if "_ch_2" in s]
    ch3 = [s for s in lst if "_ch_3" in s]

    zippedlist = list(zip(ch0, ch1, ch2, ch3))


    df = pd.DataFrame(zippedlist, columns=['FileName_OrigDNA', 
                                        'FileName_OrigProtein', 
                                        'FileName_OrigMito',
                                       'FileName_OrigER'])

    df['PathName_OrigDNA'] = fpath
    df['PathName_OrigProtein'] = fpath
    df['PathName_OrigMito'] = fpath
    df['PathName_OrigER'] = fpath
    df['Metadata_Plate'] = platename
    df['FileName_IllumDNA'] = platename+'_IllumDNA.npy'
    df['PathName_IllumDNA'] = batchpath + '/' + 'illum' + '/' + platename
    df['FileName_IllumProtein'] = platename+'_IllumProtein.npy'
    df['PathName_IllumProtein'] = batchpath + '/' + 'illum' + '/' + platename
    df['FileName_IllumMito'] = platename+'_IllumMito.npy'
    df['PathName_IllumMito'] = batchpath + '/' + 'illum' + '/' + platename
    df['FileName_IllumER'] = platename+'_IllumER.npy'
    df['PathName_IllumER'] = batchpath + '/' + 'illum' + '/' + platename
    
    df['Metadata_Well'] = (df.FileName_OrigDNA
                       .str.split('_')
                       .str.get(5)
                       .str.split('-')
                       .str.get(0)
                       .apply(lambda x: x[0] + str(0) + x[1] if len(x) < 3 else x)
                          )
    

                           
    df['Metadata_Site'] = (df.FileName_OrigDNA
                       .str.split('_')
                       .str.get(6)
                       .str.split('.')
                       .str.get(0)
                          )


    colnames = ['FileName_OrigDNA', 'PathName_OrigDNA',
             'FileName_OrigProtein', 'PathName_OrigProtein',
             'FileName_OrigMito', 'PathName_OrigMito',
             'FileName_OrigER', 'PathName_OrigER', 
             'Metadata_Plate','Metadata_Well', 
            'Metadata_Site', 'FileName_IllumDNA', 'PathName_IllumDNA', 
            'FileName_IllumProtein', 'PathName_IllumProtein', 
            'FileName_IllumMito', 'PathName_IllumMito','FileName_IllumER','PathName_IllumER']

    return df.reindex(columns=colnames)

loaddataillum = load_data_with_illum()
loaddataillum.to_csv(outpath + "/" + 'load_data_with_illum.csv')