In [12]:
import bio2bel_creeds.parser
from bio2bel_creeds.parser import (
    hgnc_gene_symbol_update,mgi_gene_symbol_update,
)
import bio2bel_hgnc
import bio2bel_mgi
import bio2bel_rgd
import pybel
import pandas as pd

In [2]:
bio2bel_hgnc.get_version()

'0.2.3'

In [3]:
bio2bel_rgd.get_version()

'0.0.1-dev'

In [4]:
pybel.get_version()

'0.13.2'

In [5]:
hgnc_manager = bio2bel_hgnc.Manager()
if not hgnc_manager.is_populated():
    hgnc_manager.populate()
hgnc_gene_symbol_to_hgnc_id = hgnc_manager.build_hgnc_symbol_id_mapping()

In [6]:
mgi_manager = bio2bel_mgi.Manager()
if not mgi_manager.is_populated():
    mgi_manager.populate()
mgi_gene_symbol_to_mgi_id = mgi_manager.build_mgi_gene_symbol_to_mgi_id_mapping()

In [7]:
rgd_manager = bio2bel_rgd.Manager()
if not rgd_manager.is_populated():
    rgd_manager.populate()
rgd_gene_symbol_to_rgd_id = rgd_manager.build_rgd_gene_symbol_to_rgd_id_mapping()

In [8]:
df = bio2bel_creeds.parser.get_gene_perturbations_metadata_preprocessed_df()
df.head()

Unnamed: 0,experiment_id,cell_type,curator,geo_id,organism,pert_type,gene_namespace,gene_id,gene_symbol
0,gene:508,T reg,dsurujon,GSE40655,mouse,knockout,mgi,MGI:1890077,Foxo1
1,gene:509,heart,dsurujon,GSE44192,mouse,overexpression,mgi,MGI:1914218,Plin5
2,gene:2349,Inducible stable HEK 293 Flp-In T-REx cells ov...,gszeto,GSE40601,human,overexpressing a hyperactive mutant,hgnc,13280,ERO1A
3,gene:2348,Inducible stable HEK 293 Flp-In T-REx Ero1alph...,gszeto,GSE40601,human,overexpression,hgnc,13280,ERO1A
4,gene:2343,OT-I T cells from spleen,AliDot,GSE53388,mouse,knockdown,mgi,MGI:1289252,Ppp2r2d


In [9]:
j = bio2bel_creeds.parser.get_gene_perturbations_json()
j[0]

{'cell_type': 'T reg',
 'pert_ids': ['GSM998920', 'GSM998921'],
 'hs_gene_symbol': 'FOXO1',
 'curator': 'dsurujon',
 'geo_id': 'GSE40655',
 'platform': 'GPL1261',
 'version': '1.0',
 'ctrl_ids': ['GSM998922', 'GSM998923'],
 'down_genes': [['E430024C06Rik', -0.1486409455537796],
  ['Atp1b3', -0.10657449066638947],
  ['Sell', -0.09810968488454819],
  ['Pdcd4', -0.07999653369188309],
  ['Dgka', -0.07083979994058609],
  ['Sepp1', -0.06940750777721405],
  ['LOC665506', -0.06403633952140808],
  ['Btla', -0.05572621524333954],
  ['Smc4', -0.055546581745147705],
  ['A130040M12Rik', -0.05401214584708214],
  ['Crlf3', -0.04851319268345833],
  ['Rpl22l1', -0.04797324165701866],
  ['Tmem64', -0.04693342000246048],
  ['Nrp1', -0.046000074595212936],
  ['Rps19', -0.04567118361592293],
  ['Gm4076', -0.0431458055973053],
  ['Tmem71', -0.043116968125104904],
  ['Shisa5', -0.04304981231689453],
  ['Egr1', -0.042601458728313446],
  ['Clk1', -0.039856139570474625],
  ['Lclat1', -0.0391279011964798],
  ['P

In [13]:
rows = []
for entry in j:
    experiment_id = entry['id']
    organism = entry['organism']
    if organism == 'mouse':
        namespace = 'mgi'
        d = mgi_gene_symbol_to_mgi_id
        u = mgi_gene_symbol_update
    elif organism == 'rat':
        namespace = 'rgd'
        d = rgd_gene_symbol_to_rgd_id
        u = {}
    elif organism == 'human':
        namespace = 'hgnc'
        d = hgnc_gene_symbol_to_hgnc_id
        u = hgnc_gene_symbol_update
    
    for gene_symbol, exp in entry['down_genes']:
        gene_symbol = u.get(gene_symbol, gene_symbol)
        gene_id = d.get(gene_symbol)
        rows.append((
            experiment_id, 
            namespace,
            gene_id,
            gene_symbol,
            'down',
            exp,
        ))
    
    for gene_symbol, exp in entry['up_genes']:
        gene_symbol = u.get(gene_symbol, gene_symbol)
        gene_id = d.get(gene_symbol)
        rows.append((
            experiment_id, 
            namespace,
            gene_id,
            gene_symbol,
            'up',
            exp,
        ))
            
            
ex_df = pd.DataFrame(rows, columns=[
    'experiment_id', 'gene_namespace,'
    'gene_id', 'gene_name',
    'direction', 'value',
])

In [14]:
ex_df.head()

Unnamed: 0,0,1,2,3,4,5
0,gene:508,mgi,MGI:2442039,E430024C06Rik,down,-0.148641
1,gene:508,mgi,MGI:107788,Atp1b3,down,-0.106574
2,gene:508,mgi,MGI:98279,Sell,down,-0.09811
3,gene:508,mgi,MGI:107490,Pdcd4,down,-0.079997
4,gene:508,mgi,MGI:102952,Dgka,down,-0.07084
