In [1]:
from src.protocol import Protocol,RBPS
from src.database import Database

import pandas as pd
import numpy as np

## Export valid samples

In [2]:
db = Database()
data = db.all_samples
data = data.query('neuron_type=="D6"')
data = data.query('als_label != "unclear" and (condition=="untreated" or condition=="osmotic" or condition=="oxidative" or condition=="heat" or condition=="heat_2h" or condition=="osmotic_1h" or condition=="osmotic_2h" or condition=="osmotic_6h")')
data = data.query('channel_1=="DAPI" and channel_2=="BIII"')
data = data[data.channel_3.isin(RBPS) | data.channel_4.isin(RBPS)]

## Build new dataframe with channels separated in different rows

In [3]:
channels = {
    0: 'channel_1',
    1: 'channel_2',
    2: 'channel_3',
    3: 'channel_4'
}
correct_channels = ['DAPI', 'BIII', 'SFPQ', 'FUS', 'TDP-43', 'SFPQ', 'hnRNPK', 'hnRNPA1']
headers = ['experiment', 'plate', 'neuron_type', 'condition', 'stress_label', 'cell_line', 'als_label', \
           'well_row', 'well_col', 'channel', 'fov', 'number_of_planes', 'exclude']
df = pd.DataFrame(columns=headers)

In [4]:
for i, row in data.iterrows():
    new_row = row.drop(['number_of_channels', 'channel_1', 'channel_2', 'channel_3', 'channel_4'])
    for j in range(row['number_of_channels']): 
        channel = row[channels[j]]
        if channel in correct_channels: 
            new_row['channel'] = channel
            df = df.append(new_row)
            if j==2 or j==3:
                new_row['channel'] = f'DAPI_BIII_{channel}'
                df = df.append(new_row)
            
    new_row['channel'] = 'DAPI_BIII'
    df = df.append(new_row)
    


## Add experts

In [12]:
experts_als = ['DAPI', 'BIII', 'DAPI_BIII']
[experts_als.append(f'DAPI_BIII_{rbp}') for rbp in ['SFPQ', 'FUS', 'TDP-43', 'hnRNPK', 'hnRNPA1']]
[experts_als.append(f'{rbp}') for rbp in ['SFPQ', 'FUS', 'TDP-43', 'hnRNPK', 'hnRNPA1']]

experts_als= ['expert_control_als_untreated_'+expert for expert in experts_als]
experts_als

['expert_control_als_untreated_DAPI',
 'expert_control_als_untreated_BIII',
 'expert_control_als_untreated_DAPI_BIII',
 'expert_control_als_untreated_DAPI_BIII_SFPQ',
 'expert_control_als_untreated_DAPI_BIII_FUS',
 'expert_control_als_untreated_DAPI_BIII_TDP-43',
 'expert_control_als_untreated_DAPI_BIII_hnRNPK',
 'expert_control_als_untreated_DAPI_BIII_hnRNPA1',
 'expert_control_als_untreated_SFPQ',
 'expert_control_als_untreated_FUS',
 'expert_control_als_untreated_TDP-43',
 'expert_control_als_untreated_hnRNPK',
 'expert_control_als_untreated_hnRNPA1']

In [10]:
experts_all_stress = []
for stress in ['osmotic', 'heat', 'oxidative']: 
    experts_stress = ['DAPI', 'BIII', 'DAPI_BIII']
    [experts_stress.append(f'DAPI_BIII_{rbp}') for rbp in ['SFPQ', 'FUS', 'TDP-43', 'hnRNPK', 'hnRNPA1']]
    [experts_stress.append(rbp) for rbp in ['SFPQ', 'FUS', 'TDP-43', 'hnRNPK', 'hnRNPA1']]
    experts_stress= [f'expert_control_untreated_{stress}_'+expert for expert in experts_stress]
    experts_all_stress.extend(experts_stress)

In [11]:
all_experts = []
all_experts.extend(experts_als)
all_experts.extend(experts_all_stress)
all_experts

['expert_control_als_untreated_DAPI',
 'expert_control_als_untreated_BIII',
 'expert_control_als_untreated_DAPI_BIII',
 'expert_control_als_untreated_DAPI_BIII_SFPQ',
 'expert_control_als_untreated_DAPI_BIII_FUS',
 'expert_control_als_untreated_DAPI_BIII_TDP-43',
 'expert_control_als_untreated_DAPI_BIII_hnRNPK',
 'expert_control_als_untreated_DAPI_BIII_hnRNPA1',
 'expert_control_als_untreated_SFPQ',
 'expert_control_als_untreated_FUS',
 'expert_control_als_untreated_TDP-43',
 'expert_control_als_untreated_hnRNPK',
 'expert_control_als_untreated_hnRNPA1',
 'expert_control_untreated_osmotic_DAPI',
 'expert_control_untreated_osmotic_BIII',
 'expert_control_untreated_osmotic_DAPI_BIII',
 'expert_control_untreated_osmotic_DAPI_BIII_SFPQ',
 'expert_control_untreated_osmotic_DAPI_BIII_FUS',
 'expert_control_untreated_osmotic_DAPI_BIII_TDP-43',
 'expert_control_untreated_osmotic_DAPI_BIII_hnRNPK',
 'expert_control_untreated_osmotic_DAPI_BIII_hnRNPA1',
 'expert_control_untreated_osmotic_SFPQ',


In [12]:
print(len(all_experts))

52


In [13]:
for expert in all_experts:
    df[expert] = np.nan

In [14]:
df

Unnamed: 0,experiment,plate,neuron_type,condition,stress_label,cell_line,als_label,well_row,well_col,channel,...,expert_control_untreated_oxidative_DAPI_BIII_SFPQ,expert_control_untreated_oxidative_DAPI_BIII_FUS,expert_control_untreated_oxidative_DAPI_BIII_TDP-43,expert_control_untreated_oxidative_DAPI_BIII_hnRNPK,expert_control_untreated_oxidative_DAPI_BIII_hnRNPA1,expert_control_untreated_oxidative_SFPQ,expert_control_untreated_oxidative_FUS,expert_control_untreated_oxidative_TDP-43,expert_control_untreated_oxidative_hnRNPK,expert_control_untreated_oxidative_hnRNPA1
1704,E58,P4,D6,untreated,no_stress,C1,control,2,2,DAPI,...,,,,,,,,,,
1704,E58,P4,D6,untreated,no_stress,C1,control,2,2,BIII,...,,,,,,,,,,
1704,E58,P4,D6,untreated,no_stress,C1,control,2,2,SFPQ,...,,,,,,,,,,
1704,E58,P4,D6,untreated,no_stress,C1,control,2,2,DAPI_BIII_SFPQ,...,,,,,,,,,,
1704,E58,P4,D6,untreated,no_stress,C1,control,2,2,DAPI_BIII,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21591,E69,P35,D6,untreated,no_stress,CB1D,als,7,11,DAPI,...,,,,,,,,,,
21591,E69,P35,D6,untreated,no_stress,CB1D,als,7,11,BIII,...,,,,,,,,,,
21591,E69,P35,D6,untreated,no_stress,CB1D,als,7,11,FUS,...,,,,,,,,,,
21591,E69,P35,D6,untreated,no_stress,CB1D,als,7,11,DAPI_BIII_FUS,...,,,,,,,,,,


In [15]:
df.reset_index(drop=True, inplace=True)
df

In [27]:
# BE CAREFUL: creates EMPTY image_probabilities file 
df.to_csv('../results/image_probabilities.csv', index=False)