In [1]:
import pandas as pd


In [2]:

# Load the edges
edges_orig = pd.read_csv("../../data_raw/immunoglobe_edges.csv")

# Filter only relevant node types
nodes = pd.read_csv('../../data_raw/immunoglobe_nodes.csv')  # Assuming you have node metadata
cell_nodes = nodes[nodes['Node_Type'] == 'Cell']['name'].tolist()
cytokine_nodes = nodes[nodes['Node_Type'] == 'Cytokine']['name'].tolist()
edges = pd.DataFrame()
# Function to parse the edge name
def parse_edge_name(edge_name):
    # Split the edge name by the parentheses and strip extra spaces
    parts = edge_name.split('(')
    source = parts[0].strip()
    action = parts[1].split(')')[0].strip()  # Action is between parentheses
    target = parts[1].split(')')[1].strip()  # Target is after the closing parenthesis
    return source, action, target

# Apply the function to the edge_name column
edges[['Source', 'action', 'Target']] = edges_orig['name'].apply(lambda x: pd.Series(parse_edge_name(x)))
edges['Immune_Process'] = edges_orig['ImmuneProcesses']
edges = edges[edges['action']!='Polarize']
cells_removed = ['Keratinocyte','Tumor','Smooth muscle','Hepatocyte','Epithelial','Endothelial','Fibroblast']
mask_remove_cells = [(edges.iloc[i]['Target'] not in cells_removed)&(edges.iloc[i]['Source'] not in cells_removed) for i in range(len(edges))]
edges = edges[mask_remove_cells]
# Now you have a new DataFrame with Source, action, and Target columns
print(edges[['Source', 'action', 'Target','Immune_Process']].to_string())


                          Source         action                                   Target                                                                                                                                                    Immune_Process
0                   Erythroblast  Differentiate                              Erythrocyte                                                                                                                                                     Hematopoiesis
1                          41BBL       Activate                                        B                                                                                                                                                               NaN
2                              B        Secrete                                     BAFF                                                                                                                                               Antibody product

In [3]:
one_step = edges[(edges['Source'].isin(cytokine_nodes)) & (~edges['Target'].isin(cytokine_nodes))]
one_step = one_step.merge(edges, left_on='Target', right_on='Source', suffixes=('_1', '_2'))

# Keep only cell-cell mediated interactions
one_step_filtered = one_step[one_step['Target_2'].isin(cytokine_nodes)][['Source_1', 'Target_2', 'action_1', 'action_2','Target_1','Immune_Process_1','Immune_Process_2']]
# Define final interactions
def get_final_interaction(inter1, inter2):
    if ('Inhibit' in [inter1, inter2]) or ('Kill' in [inter1, inter2]):
        return 'Negative'
    return 'Positive'

def get_final_interaction_2(inter1, inter2):
    if inter1=='Secrete':
        return inter2
    else: 
        return inter1

one_step_filtered['Final_Interaction'] = one_step_filtered.apply(
    lambda row: get_final_interaction(row['action_1'], row['action_2']), axis=1
)

immune_processes = []
for i in range(len(one_step_filtered)):
    immune_processes_1 = str(one_step_filtered.iloc[i]['Immune_Process_1']).split(',')
    immune_processes_2 = str(one_step_filtered.iloc[i]['Immune_Process_2']).split(',')
    immune_processes_all = list(set(immune_processes_1+immune_processes_2))
    immune_processes.append(immune_processes_all)
one_step_filtered['Immune_Process'] = immune_processes
# Merge with direct interactions
final_network = one_step_filtered[['Source_1','Target_2','Final_Interaction','Target_1','Immune_Process']].rename(columns={'Source_1': 'Source', 'Target_2': 'Target', 'Final_Interaction': 'action','Target_1':'Mediator'})
final_network = final_network.groupby(["Source", "Target", "action", "Mediator"], as_index=False).agg({
    'Immune_Process': lambda x: sorted(set(sum(x, [])))  # Flatten and remove duplicates
})
print(final_network.to_string())

      Source                      Target    action     Mediator                                                                                                                                                                                               Immune_Process
0      41BBL                        BAFF  Positive            B                                                                                                                                                                                   [Antibody production, nan]
1      41BBL                        IL10  Positive            B                                                                                                                                                                                     [Immunosuppression, nan]
2      41BBL                        IL35  Positive            B                                                                                                                                  

In [4]:
filtered_network = pd.DataFrame(columns = ['Source','Target','action','Mediators'])

for cyt1_group_name, cyt1_group_data in final_network.groupby('Source'):
    for cyt2_group_name, cyt2_group_data in cyt1_group_data.groupby('Target'):
        if len(cyt2_group_data['action'].unique())==1:
            row = {'Source':cyt1_group_name,'Target':cyt2_group_name,'action':cyt2_group_data['action'].unique(),'Mediators':', '.join(cyt2_group_data['Mediator'].to_list())}
            filtered_network = pd.concat([filtered_network,pd.DataFrame(row)],ignore_index=True)
        else:
            row = {'Source':[cyt1_group_name],'Target':[cyt2_group_name]}
            data_neg = cyt2_group_data[cyt2_group_data['action']=='Negative']
            data_pos = cyt2_group_data[cyt2_group_data['action']=='Positive']
            if len(data_neg)>len(data_pos):
                row['action']=['Negative']
                row['Mediators'] = [', '.join(data_neg['Mediator'].to_list())]
                filtered_network = pd.concat([filtered_network,pd.DataFrame(row)],ignore_index=True)
            elif len(data_pos)>len(data_neg):
                row['action']=['Positive']
                row['Mediators'] = [', '.join(data_pos['Mediator'].to_list())]
                filtered_network = pd.concat([filtered_network,pd.DataFrame(row)],ignore_index=True)

print(filtered_network.to_string())
filtered_network.to_csv('../data_processed/filtered_cytokine_network.csv')

      Source                      Target    action                                   Mediators
0      41BBL                        BAFF  Positive                                           B
1      41BBL                        IL10  Positive                                           B
2      41BBL                        IL35  Positive                                           B
3      41BBL                         IL6  Positive                                           B
4      41BBL                         LTa  Positive                                           B
5      41BBL                         LTb  Positive                                           B
6      APRIL                        BAFF  Positive                                           B
7      APRIL                        IL10  Positive                                           B
8      APRIL                        IL35  Positive                                           B
9      APRIL                         IL6  Positive