In [1]:
## find all parents of GO:0010898, and the lineage all the way to the root 
# load a sif network, where showing child-parent relationship
import pandas as pd

def load_ont(filename):
    data = []
    with open(filename, 'r') as f:
        for line in f:
            columns = line.strip().split('\t')  # tab-separated values
            if len(columns) >= 3:
                parent = columns[0] # based on collapsed_go.symbol, the first column is parent
                child = columns[1] # second column is child
                interaction = columns[2] # third column is interaction type
                if interaction == 'default':
                    data.append({'child': child, 'parent': parent}) # Add child and parent to a dataframe
    df = pd.DataFrame(data)
    return df
             

def get_ancestors(child_node, edges):
    ancestors = []
    # Find the parents of the current node. In the 'edges' DataFrame, 
    # this is done by looking up rows where the 'child' column matches 
    # the current node, and then getting the corresponding values in the 'parent' column.
    parents = edges.loc[edges['child'] == child_node, 'parent'].values

    for parent in parents:
        if parent not in ancestors:  # Check if the parent is already in the ancestors list
            ancestors.append(parent)  # If not, add it to the list
            ancestors.extend(get_ancestors(parent, edges))  # Recursively find the parent's ancestors

    return ancestors


if __name__ == '__main__':
    GO_ID = 'GO:0010897'
    go_network = load_ont('./data/GO_BP/collapsed_go.symbol')
    ancestors = get_ancestors(GO_ID, go_network)
    print(ancestors)
    print(len(ancestors))
    filtered_network = go_network[(go_network['child'].isin(ancestors+[GO_ID]))]
    GO_ID_name = GO_ID.replace(':', '_')
    filtered_network.to_csv(f'./data/GO_term_analysis/{GO_ID_name}_subhierarchy.txt', sep= '\t', index=False)


['GO:0031330', 'GO:0031329', 'GO:0044248', 'GO:0044237', 'GO:0009987', 'GO:0008150', 'GO:0008152', 'GO:0008150', 'GO:0009056', 'GO:0008152', 'GO:0008150', 'GO:0009894', 'GO:0019222', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0008152', 'GO:0008150', 'GO:0009056', 'GO:0008152', 'GO:0008150', 'GO:0031323', 'GO:0044237', 'GO:0009987', 'GO:0008150', 'GO:0008152', 'GO:0008150', 'GO:0050794', 'GO:0009987', 'GO:0008150', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0019222', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0008152', 'GO:0008150', 'GO:0031324', 'GO:0044237', 'GO:0009987', 'GO:0008150', 'GO:0008152', 'GO:0008150', 'GO:0048523', 'GO:0009987', 'GO:0008150', 'GO:0048519', 'GO:0008150', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0050794', 'GO:0009987', 'GO:0008150', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0009892', 'GO:0048519', 'GO:0008150', 'GO:0050789', 'GO:0008150', 'GO:0065007', 'GO:0008150', 'GO:0

In [2]:
print(len(filtered_network), len(set(ancestors)))

97 41


In [3]:
# make a nodes attribute table for all nodes in the subnetwork
GO_ID = 'GO:0010897'
GO_ID_name = GO_ID.replace(':', '_')
# load the GO term annotation file
import pandas as pd
df_csv = pd.read_csv('./data/go_terms.csv', index_col=0)

terms_in_subnetwork = set(filtered_network['child'].tolist() + filtered_network['parent'].tolist())

sub_nodes = df_csv.loc[df_csv['GO'].isin(terms_in_subnetwork), :]

# sub_nodes.head()
print(sub_nodes.shape)

sub_nodes.to_csv(f'./data/GO_term_analysis/{GO_ID_name}_subhierarchy_nodes.txt', sep='\t', index=False)

(42, 4)


In [4]:
sub_nodes.head()

Unnamed: 0,GO,Genes,Gene_Count,Term_Description
121,GO:0071704,COMT ASPSCR1 CWF19L1 SERPINI2 ACTR6 ATP5IF1 CA...,10459,organic substance metabolic process
312,GO:0090207,NR1H2 SIRT1 KAT5 APOA5 APOA4 PANK2 CTDNEP1 CNE...,42,regulation of triglyceride metabolic process
516,GO:0044242,HAO1 PLA2G7 HEXA PHYH HAO2 HEXB ABCD4 MGLL SMP...,223,cellular lipid catabolic process
616,GO:0009892,TRIM33 ZNF256 ZNF692 SERPINI2 CBFA2T3 ACTR6 AT...,2806,negative regulation of metabolic process
1025,GO:0019216,HPGD PRKAG2 ADRA2A TREM2 APOA5 TWIST1 ATP1A1 A...,303,regulation of lipid metabolic process
