In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import operator
import pickle

In [2]:
df = pd.read_csv("data/nodes.tsv", sep='\t')

In [3]:
df

Unnamed: 0,ID,Name,Depth
0,HP:0000001,All,0
1,HP:0012823,Clinical modifier,1
2,HP:0000118,Phenotypic abnormality,1
3,HP:0032223,Blood group,1
4,HP:0031797,Clinical course,1
...,...,...,...
15242,HP:0009952,Complete duplication of the middle phalanx of ...,15
15243,HP:0009951,Partial duplication of the distal phalanx of t...,15
15244,HP:0009979,Complete duplication of the middle phalanx of ...,15
15245,HP:0009978,Complete duplication of the distal phalanx of ...,15


In [4]:
df.loc[df['ID'] == 'HP:0012824']

Unnamed: 0,ID,Name,Depth
76,HP:0012824,Severity,2


In [5]:
with open('graph_ontology.pkl', 'rb') as input:
    graph_load = pickle.load(input)

In [6]:
Phenotypic_abnor_descendants = graph_load.get_descendants('HP:0000118')
df_new = df[df.ID.isin(Phenotypic_abnor_descendants)]
entry = df.loc[df['ID'] == 'HP:0000118']
df_new = pd.concat([entry,df_new])
df_new = df_new.rename(columns = {'ID': 'HPO_ID', 'Name': 'Description'}, inplace = False)
df_new['Remarks'] = 'Definition'

In [7]:
df_new

Unnamed: 0,HPO_ID,Description,Depth,Remarks
2,HP:0000118,Phenotypic abnormality,1,Definition
9,HP:0000478,Abnormality of the eye,2,Definition
11,HP:0000769,Abnormality of the breast,2,Definition
12,HP:0001574,Abnormality of the integument,2,Definition
13,HP:0003011,Abnormality of the musculature,2,Definition
...,...,...,...,...
15242,HP:0009952,Complete duplication of the middle phalanx of ...,15,Definition
15243,HP:0009951,Partial duplication of the distal phalanx of t...,15,Definition
15244,HP:0009979,Complete duplication of the middle phalanx of ...,15,Definition
15245,HP:0009978,Complete duplication of the distal phalanx of ...,15,Definition


In [8]:
type(graph_load.nodes['HP:0000118']['synonym'])

str

In [9]:
temp_data = pd.DataFrame(columns=['HPO_ID','Description','Depth', 'Remarks'])
i=0
for index, row in df_new.iterrows():
    try:
        synonyms = graph_load.nodes[row['HPO_ID']]['synonym']
        if (type(synonyms)==str):
            temp_data.loc[i, ['HPO_ID']] = row['HPO_ID']
            temp_data.loc[i, ['Description']] = synonyms
            temp_data.loc[i, ['Depth']] = row['Depth']
            temp_data.loc[i, ['Remarks']] = 'Synonym'
            i=i+1
        else:
            for item in synonyms:
                temp_data.loc[i, ['HPO_ID']] = row['HPO_ID']
                temp_data.loc[i, ['Description']] = item
                temp_data.loc[i, ['Depth']] = row['Depth']
                temp_data.loc[i, ['Remarks']] = 'Synonym'
                i=i+1
    except KeyError:
        pass

In [10]:
temp_data

Unnamed: 0,HPO_ID,Description,Depth,Remarks
0,HP:0000118,Organ abnormality,1,Synonym
1,HP:0000478,Abnormal eye,2,Synonym
2,HP:0000478,Abnormality of the eye,2,Synonym
3,HP:0000478,Eye disease,2,Synonym
4,HP:0000769,Abnormality of the breast,2,Synonym
...,...,...,...,...
19663,HP:0200159,Agenesis of deciduous mandibular lateral incisor,15,Synonym
19664,HP:0200159,Failure of development of deciduous mandibular...,15,Synonym
19665,HP:0200159,Failure of development of primary mandibular l...,15,Synonym
19666,HP:0200159,Missing deciduous mandibular lateral incisor,15,Synonym


In [11]:
df_new = pd.concat([df_new, temp_data])
df_new_sorted = df_new.sort_values(by='Depth', ascending=True)
df_new_sorted

Unnamed: 0,HPO_ID,Description,Depth,Remarks
2,HP:0000118,Phenotypic abnormality,1,Definition
0,HP:0000118,Organ abnormality,1,Synonym
11,HP:0000769,Abnormality of the breast,2,Definition
37,HP:0002086,Respiratory abnormality,2,Synonym
36,HP:0000119,Urogenital anomalies,2,Synonym
...,...,...,...,...
19582,HP:0009968,Partial duplication of the outermost bone of t...,15,Synonym
19581,HP:0009968,Notched outermost bone of the middle finger,15,Synonym
19580,HP:0009968,Bifid terminal phalanx of the 3rd finger,15,Synonym
19593,HP:0011046,Absence of upper front baby tooth,15,Synonym


In [17]:
df_drop = df_new_sorted.drop_duplicates(subset=['HPO_ID', 'Description','Depth'])

In [18]:
df_drop.to_csv("Phenotypic_descendants_synonym_depth_new.csv", index= False)

In [None]:
phenotype_data = pd.read_csv("Phenotypic_descendants_depth.csv")
phenotype_data

In [None]:
df_new1 = phenotype_data.loc[phenotype_data.Depth<3][['HPO_ID', 'Description']]
df_new1

In [None]:
len(graph_load.get_descendants('HP:0000118'))

In [None]:
Immediate_child= graph_load.get_immediate_descendants('HP:0000118')
for child in Immediate_child:
    print (child, graph_load.get_ancestors(child))

In [None]:
Immediate_descendants_list = graph_load.get_immediate_descendants('HP:0000118')
Immediate_descendants_list

In [None]:
result = pd.DataFrame(columns=['HPO_ID','Definition','Ancestor'])
i=0
for index, row in phenotype_data.iloc[29:].iterrows():
    ancestors_list = graph_load.get_ancestors(row['HPO_ID'])
    common_ancestors = ancestors_list.intersection(Immediate_descendants_list)
    if len(common_ancestors) ==1:
        print(row['HPO_ID'], ancestors_list, common_ancestors)
        try:
            result.loc[i, ['Definition']] = graph_load.nodes[row['HPO_ID']]['def']
            str_ancestors = " ".join(common_ancestors),
            result.loc[i, ['Ancestor']] = str_ancestors
            result.loc[i, ['HPO_ID']] = row['HPO_ID']
            i = i+1
        except KeyError:
            pass

In [None]:
result

In [None]:
result.to_csv("Single_ancestor.csv", index= False)

In [None]:
temp = int(result[result['HPO_ID']=='HP:0025461'].index.values)
print (temp)

In [None]:
for index, row in result.head(5).iterrows():
    print (row.loc['Definition'])