In [43]:
import sys, os, glob
import pandas as pd
sys.path.append('../../..')
from src import raw_data_path, direcslist

### Specify paths

In [44]:
exclude = ['!',]

path = raw_data_path + '/Imaging/PAR-2 NEBD/'
paths = direcslist(path, 1, exclude=exclude)
print(len(paths))

678


### Exclude paths

Excluding any paths that don't have an af_corrected image and an ROI

In [45]:
paths = [p for p in paths if os.path.exists(p + '/af_corrected.tif')]
paths = [p for p in paths if os.path.exists(p + '/ROI_manual.txt')]
print(len(paths))

669


### Exctract data from paths

In [46]:
dates = []
lines = []
conditions = []
embryo_ids = []
for p in paths:
    code = p.strip('/').split('/')[-2]
    dates.append(code.split('_')[0])
    lines.append(code.split('_')[1])
    if len(code.split('_')) == 4:
        conditions.append(code.split('_')[2])
    else:
        conditions.append('wt')
    embryo_ids.append(int(os.path.getmtime(glob.glob(p + '/*488 SP 535-50*')[0])))

In [47]:
print(len(set(embryo_ids)))

669


### Remove duplicates

In [48]:
unique_ids = []
unique_ids_bool = []
for e in embryo_ids:
    if not e in unique_ids:
        unique_ids.append(e)
        unique_ids_bool.append(True)
    else:
        unique_ids_bool.append(False)

In [49]:
paths = [d for d, u in zip(paths, unique_ids_bool) if u]
dates = [d for d, u in zip(dates, unique_ids_bool) if u]
lines = [d for d, u in zip(lines, unique_ids_bool) if u]
conditions = [d for d, u in zip(conditions, unique_ids_bool) if u]
embryo_ids = [d for d, u in zip(embryo_ids, unique_ids_bool) if u]

### Create dataframe

In [50]:
df = pd.DataFrame({'Path': paths, 'EmbryoID': embryo_ids, 'Line': lines, 'Date': dates, 'RNAi': conditions})
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292350,lp637,200129,wt
1,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292382,lp637,200129,wt
2,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292449,lp637,200129,wt
3,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292501,lp637,200129,wt
4,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292520,lp637,200129,wt
...,...,...,...,...,...
664,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545240,nwg496,220907,wt
665,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545908,nwg496,220907,wt
666,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546008,nwg496,220907,wt
667,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546125,nwg496,220907,wt


### Line dataframe

In [51]:
# lines_df = pd.read_csv('Par2_Lines.csv')
# lines_df

### Filter embryos not in lines database

In [52]:
for index, row in df.iterrows():
    if not row.Line in list(lines_df.Line.unique()) + list(lines_df.Alternative.unique()):
        df.drop(index, inplace=True)
df.reset_index(inplace=True, drop=True)

In [53]:
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292350,lp637,200129,wt
1,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292382,lp637,200129,wt
2,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292449,lp637,200129,wt
3,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292501,lp637,200129,wt
4,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292520,lp637,200129,wt
...,...,...,...,...,...
664,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545240,nwg496,220907,wt
665,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545908,nwg496,220907,wt
666,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546008,nwg496,220907,wt
667,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546125,nwg496,220907,wt


### Replace nwg325 with lp637 (different names for same line)

In [54]:
df = df.replace('nwg325', 'lp637')
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292350,lp637,200129,wt
1,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292382,lp637,200129,wt
2,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292449,lp637,200129,wt
3,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292501,lp637,200129,wt
4,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292520,lp637,200129,wt
...,...,...,...,...,...
664,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545240,nwg496,220907,wt
665,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545908,nwg496,220907,wt
666,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546008,nwg496,220907,wt
667,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546125,nwg496,220907,wt


### Sort by embryo ID

In [55]:
df = df.sort_values(by='EmbryoID')
df.reset_index(inplace=True, drop=True)
df

Unnamed: 0,Path,EmbryoID,Line,Date,RNAi
0,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292350,lp637,200129,wt
1,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292382,lp637,200129,wt
2,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292449,lp637,200129,wt
3,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292501,lp637,200129,wt
4,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1580292520,lp637,200129,wt
...,...,...,...,...,...
664,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545240,nwg496,220907,wt
665,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662545908,nwg496,220907,wt
666,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546008,nwg496,220907,wt
667,/Users/blandt/Desktop/PaperData//Imaging/PAR-2...,1662546125,nwg496,220907,wt


### Save

In [56]:
df.to_csv('../../../data/Par2_NEBD_Embryos_dataframe.csv')