In [3]:
import pandas as pd

# Load the edges from the standardized location
edges_orig = pd.read_csv("../../data_raw/immunoglobe_edges.csv")

# Filter only relevant node types
nodes = pd.read_csv("../../data_raw/immunoglobe_nodes.csv")
cell_nodes = nodes[nodes['Node_Type'] == 'Cell']['name'].tolist()
edges = pd.DataFrame()
# Function to parse the edge name
def parse_edge_name(edge_name):
    # Split the edge name by the parentheses and strip extra spaces
    parts = edge_name.split('(')
    source = parts[0].strip()
    action = parts[1].split(')')[0].strip()  # Action is between parentheses
    target = parts[1].split(')')[1].strip()  # Target is after the closing parenthesis
    return source, action, target

# Apply the function to the edge_name column
edges[['Source', 'Action', 'Target']] = edges_orig['name'].apply(lambda x: pd.Series(parse_edge_name(x)))
edges['Immune_Process'] = edges_orig['ImmuneProcesses']
edges = edges[edges['Action']!='Polarize']
# Now you have a new DataFrame with Source, action, and Target columns
print(edges[['Source', 'Action', 'Target','Immune_Process']].to_string())


                          Source         Action                                   Target                                                                                                                                                    Immune_Process
0                   Erythroblast  Differentiate                              Erythrocyte                                                                                                                                                     Hematopoiesis
1                          41BBL       Activate                                        B                                                                                                                                                               NaN
2                              B        Secrete                                     BAFF                                                                                                                                               Antibody product

In [3]:
print(edges['Action'].unique())
mask = [edges.iloc[i]['Action'] in ['Activate','Secrete','Survive','Inhibit','Kill'] for i in range(len(edges))]
edges_filtered = edges[mask]
cells_removed = ['Keratinocyte','Tumor','Smooth muscle','Hepatocyte','Epithelial','Endothelial']
mask_remove_cells = [(edges_filtered.iloc[i]['Target'] not in cells_removed)&(edges_filtered.iloc[i]['Source'] not in cells_removed) for i in range(len(edges_filtered))]
edges_filtered = edges_filtered[mask_remove_cells]
print(len(edges_filtered))
print(edges_filtered.to_string())

['Differentiate' 'Activate' 'Secrete' 'Survive' 'Inhibit' 'Kill' 'Recruit']
647
                          Source    Action                                   Target                                                                                                                                                    Immune_Process
1                          41BBL  Activate                                        B                                                                                                                                                               NaN
2                              B   Secrete                                     BAFF                                                                                                                                               Antibody production
3                              B   Survive                                       DC                                                                                                   

In [5]:
edges = edges_filtered
# Select direct cell-cell interactions
direct_interactions = edges[(edges['Source'].isin(cell_nodes)) & (edges['Target'].isin(cell_nodes))]
def rename_interactions (Interaction):
    if Interaction=='Inhibit' or Interaction=='Kill':
        return 'Negative'
    return 'Positive'
direct_interactions['Action'] = direct_interactions.apply(
    lambda row: rename_interactions(row['Action']), axis=1
)
direct_interactions['Immune_Process'] = [str(direct_interactions.iloc[i]['Immune_Process']).split(',') for i in range(len(direct_interactions))]
# Select one-step mediated interactions
one_step = edges[(edges['Source'].isin(cell_nodes)) & (~edges['Target'].isin(cell_nodes))]
one_step = one_step.merge(edges, left_on='Target', right_on='Source', suffixes=('_1', '_2'))

# Keep only cell-cell mediated interactions
one_step_filtered = one_step[one_step['Target_2'].isin(cell_nodes)][['Source_1', 'Target_2', 'Action_1', 'Action_2','Target_1','Immune_Process_1','Immune_Process_2']]
# Define final interactions
def get_final_interaction(inter1, inter2):
    print(inter1,inter2)
    if ('Inhibit' in [inter1, inter2]) or ('Kill' in [inter1, inter2]):
        return 'Negative'
    return 'Positive'

def get_final_interaction_2(inter1, inter2):
    if inter1=='Secrete':
        return inter2
    else: 
        return inter1

one_step_filtered['Final_Interaction'] = one_step_filtered.apply(
    lambda row: get_final_interaction_2(row['Action_1'], row['Action_2']), axis=1
)

immune_processes = []
for i in range(len(one_step_filtered)):
    immune_processes_1 = str(one_step_filtered.iloc[i]['Immune_Process_1']).split(',')
    immune_processes_2 = str(one_step_filtered.iloc[i]['Immune_Process_2']).split(',')
    immune_processes_all = list(set(immune_processes_1+immune_processes_2))
    immune_processes.append(immune_processes_all)
one_step_filtered['Immune_Process'] = immune_processes
# Merge with direct interactions
final_network = pd.concat([
    direct_interactions[['Source', 'Target', 'Action','Immune_Process']],
    one_step_filtered[['Source_1','Target_2','Final_Interaction','Target_1','Immune_Process']].rename(columns={'Source_1': 'Source', 'Target_2': 'Target', 'Final_Interaction': 'Action','Target_1':'Mediator',})
])

final_network = final_network.groupby(["Source", "Target", "Action", "Mediator"], as_index=False).agg({
    'Immune_Process': lambda x: sorted(set(sum(x, [])))  # Flatten and remove duplicates
})
final_network.to_csv("../data_processed/filtered_cell_network_filtered_actions.csv", index=False)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  direct_interactions['Action'] = direct_interactions.apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  direct_interactions['Immune_Process'] = [str(direct_interactions.iloc[i]['Immune_Process']).split(',') for i in range(len(direct_interactions))]


In [4]:
final_network['Action'].unique()

array(['Activate', 'Inhibit', 'Survive', 'Kill'], dtype=object)