# Extract developmental stages from `HsapDv` ontology

In [1]:
import pandas
import networkx

import obo

In [2]:
with open('download/hsapdv.obo') as read_file:
    hsapdv = obo.read_obo(read_file)

In [3]:
rows = list()
for stage_id, data in hsapdv.nodes(data=True):
    rows.append((stage_id, data['name']))
adult_df = pandas.DataFrame(rows, columns=['stage_id', 'stage_name'])
adult_df = adult_df.sort_values('stage_id')

In [4]:
adult_stage_ids = networkx.algorithms.dag.ancestors(hsapdv, 'HsapDv:0000087')
adult_stage_ids.add('HsapDv:0000087')
adult_df['is_adult'] = adult_df.stage_id.isin(adult_stage_ids).astype(int)

In [5]:
adult_df.head(3)

Unnamed: 0,stage_id,stage_name,is_adult
24,HsapDv:0000001,Human developmental stage,0
118,HsapDv:0000002,embryonic stage,0
2,HsapDv:0000003,Carnegie stage 01,0


In [6]:
# Number of non-adult and adult terms
adult_df.is_adult.value_counts()

0    121
1     70
Name: is_adult, dtype: int64

In [7]:
adult_df.to_csv('data/stages.tsv', sep='\t', index=False)