In [1]:
import sys, os, glob
import pandas as pd
import numpy as np
sys.path.append('../..')
from src import raw_data_path, direcslist

### Specify paths

In [2]:
exclude = ['!',]

path = raw_data_path + '/Imaging/par2_nebd/'
paths = direcslist(path, 1, exclude=exclude)
print(len(paths))

678


### Exclude paths

Excluding any paths that don't have an af_corrected image and an ROI

In [3]:
paths = [p for p in paths if os.path.exists(p + '/af_corrected.tif')]
paths = [p for p in paths if os.path.exists(p + '/ROI_manual.txt')]
print(len(paths))

669


### Exctract data from paths

In [4]:
dates = []
lines = []
conditions = []
embryo_ids = []
for p in paths:
    code = p.strip('/').split('/')[-2]
    dates.append(code.split('_')[0])
    lines.append(code.split('_')[1])
    if len(code.split('_')) == 4:
        conditions.append(code.split('_')[2])
    else:
        conditions.append('wt')
    # embryo_id = int(os.path.getmtime(glob.glob(p + '/*488 SP 535-50*')[0]))
    # np.savetxt(p + '/EmbryoID.txt', [embryo_id], fmt='%i')
    embryo_ids.append(str(np.loadtxt(p + '/EmbryoID.txt').astype(int)))
print(len(set(embryo_ids)))

669


### Remove duplicates

In [5]:
unique_ids = []
unique_ids_bool = []
for e in embryo_ids:
    if not e in unique_ids:
        unique_ids.append(e)
        unique_ids_bool.append(True)
    else:
        unique_ids_bool.append(False)

In [6]:
paths = [d for d, u in zip(paths, unique_ids_bool) if u]
dates = [d for d, u in zip(dates, unique_ids_bool) if u]
lines = [d for d, u in zip(lines, unique_ids_bool) if u]
conditions = [d for d, u in zip(conditions, unique_ids_bool) if u]
embryo_ids = [d for d, u in zip(embryo_ids, unique_ids_bool) if u]

### Create dataframe

In [7]:
# Create dataframe
df = pd.DataFrame({'Path': paths, 'EmbryoID': embryo_ids, 'Line': lines, 'Date': dates, 'RNAi': conditions})

# Remove prefix from paths
df['Path'] = df['Path'].apply(lambda x: x[len(raw_data_path):])

df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292350,lp637,200129,wt
1,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292382,lp637,200129,wt
2,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292449,lp637,200129,wt
3,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292501,lp637,200129,wt
4,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292520,lp637,200129,wt
...,...,...,...,...,...
664,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/04",1662545240,nwg496,220907,wt
665,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/05",1662545908,nwg496,220907,wt
666,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/06",1662546008,nwg496,220907,wt
667,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/07",1662546125,nwg496,220907,wt


### Line dataframe

In [8]:
lines_df = pd.read_csv('par2_lines.csv')
lines_df

Unnamed: 0,Line,Alternative,Neon,C56S,L109R,L50R,GCN4,6HNL,S241A,PRBH3,it71,Single copy
0,n2,,0,0,0,0,0,0,0,0,0,0
1,lp637,nwg325,1,0,0,0,0,0,0,0,0,0
2,lp637bal,nwg325bal,1,0,0,0,0,0,0,0,0,0
3,nwg201,,1,0,0,0,0,0,0,0,1,0
4,nwg240,,1,1,0,0,0,0,0,0,0,0
5,nwg240bal,,1,1,0,0,0,0,0,0,0,1
6,nwg246,,1,1,0,0,0,0,0,0,1,0
7,nwg338,,1,0,1,0,0,0,0,0,0,0
8,nwg338bal,,1,0,1,0,0,0,0,0,0,1
9,nwg351,,1,0,1,1,0,0,0,0,0,0


### Filter embryos not in lines database

In [9]:
for index, row in df.iterrows():
    if not row.Line in list(lines_df.Line.unique()) + list(lines_df.Alternative.unique()):
        df.drop(index, inplace=True)
df.reset_index(inplace=True, drop=True)

In [10]:
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292350,lp637,200129,wt
1,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292382,lp637,200129,wt
2,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292449,lp637,200129,wt
3,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292501,lp637,200129,wt
4,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292520,lp637,200129,wt
...,...,...,...,...,...
664,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/04",1662545240,nwg496,220907,wt
665,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/05",1662545908,nwg496,220907,wt
666,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/06",1662546008,nwg496,220907,wt
667,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/07",1662546125,nwg496,220907,wt


### Replace nwg325 with lp637 (different names for same line)

In [11]:
df = df.replace('nwg325', 'lp637')
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292350,lp637,200129,wt
1,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292382,lp637,200129,wt
2,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292449,lp637,200129,wt
3,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292501,lp637,200129,wt
4,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292520,lp637,200129,wt
...,...,...,...,...,...
664,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/04",1662545240,nwg496,220907,wt
665,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/05",1662545908,nwg496,220907,wt
666,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/06",1662546008,nwg496,220907,wt
667,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/07",1662546125,nwg496,220907,wt


### Sort by embryo ID

In [12]:
df = df.sort_values(by='EmbryoID')
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292350,lp637,200129,wt
1,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292382,lp637,200129,wt
2,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292449,lp637,200129,wt
3,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292501,lp637,200129,wt
4,"/Imaging/par2_nebd/200129_lp637_wt_tom4,15,60/...",1580292520,lp637,200129,wt
...,...,...,...,...,...
664,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/04",1662545240,nwg496,220907,wt
665,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/05",1662545908,nwg496,220907,wt
666,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/06",1662546008,nwg496,220907,wt
667,"/Imaging/par2_nebd/220907_nwg496_tom4,15,60/07",1662546125,nwg496,220907,wt


### Save

In [13]:
df.to_csv('../../data/par2_nebd_embryos.csv')