In [None]:
# %load ../env.py
%load_ext autoreload
%autoreload 2
%pylab inline
%matplotlib inline

import matplotlib.text as text
import pandas as pd
import numpy as np
import pylab as pl
import scipy as sp
import sys
import rpy2 
import os 

#Set environment variables


# Set up the local source files
#TOP = '/'.join(os.getcwd().split('/')[:-2])+'/'
#TOP = "/share/home/ishah/ipynb/chiron/genra-analysis/"
TOP = '/'.join(os.getcwd().split('/')[:-2])+'/'

LIB = TOP+'lib'
if not LIB in sys.path: 
    sys.path.insert(0,LIB)

os.environ['PYTHONPATH']=LIB


DAT_DIR = TOP + '/data/'
FIG_DIR = TOP + '/figs/'

if not os.path.exists(DAT_DIR): os.mkdir(DAT_DIR)
if not os.path.exists(FIG_DIR): os.mkdir(FIG_DIR)


from db.mongo import *

DB = openMongo(db='genra_dev_v4')

In [None]:
TOP

## Load the genra prediction module

In [None]:
from rax.genrapred import *

## Find an example chemical 

In [None]:
C0 = DB.compounds.find_one(dict(name='PFOS'),dict(_id=0,synonyms=0))
sid = C0['dsstox_sid']
C0

## Run the GenRA Prediction

In [None]:
saveRunGenRA(sid,col_save='pred_is_v1',DB=DB,s0=0.05,k0=10,dbg=False,sel_by='toxp_txrf')

In [None]:
DB.pred_is_v1.count()

In [None]:
Hits = pd.DataFrame(runGenRA(sid,DB=DB,s0=0.05,k0=20,dbg=False,sel_by='toxp_txrf')).set_index('out')

In [None]:
H0=Hits.sort_values(['auc','p_val'],ascending=[True,False]).query("auc>0.1 and p_val<0.5")
H0

## Visualize predictions

In [None]:
pl.title("GenRA Predictions for %s" % C0['name'])
H0['auc'].plot.barh(color='grey',alpha=0.4,stacked=True)
H0['p_val'].plot.barh(color='red',alpha=1,width=0.2,stacked=True)
pl.legend()

# Parallelize

`ipcluster start --profile=pb_parallel --n=50`


In [None]:
import ipyparallel as PP

RC=PP.Client(profile='pb_parallel')
d_view=RC[:]
lb_view = RC.load_balanced_view()
lb_view.block = True
x=file("../parenv.py",'r').read()
d_view.execute(x)

In [None]:
d_view.map_sync(lambda x: DB.chm_fp.count(),range(10))

In [None]:
DB.collection_names()

In [None]:
ALL_SID = [i['dsstox_sid'] for i in DB.chm_fp.find({},dict(dsstox_sid=1))]

In [None]:
sid = ALL_SID[1000]
sid

In [None]:
saveRunGenRA(sid,col_save='pred_is_v1',DB=DB,s0=0.05,k0=10,dbg=False,sel_by='toxp_txrf')

In [None]:
import datetime

WORK=ALL_SID
#DB.pred_is_v1.delete_many({})

print "Start ... ", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "tasks = ",len(WORK)
print "DB.pred_is_v1", DB.pred_is_v1.count()

lb_view.map(lambda sid:
            saveRunGenRA(sid,col_save='pred_GH',DB=DB,s0=0.05,k0=10,
                         dbg=False,sel_by='toxp_txrf'),
            WORK)

print "DB.pred_is_v1", DB.pred_is_v1.count()
print "End ", datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "tasks = ",len(WORK)



In [None]:
for i in [u'fp', u'n_neg', u'auc',u'a_t', u'dsstox_sid', u'a_p', u'pred', u's0', u't0',
         u'n_pos', u'k0', u'p_val', u'a_s', u'out']:
    print(i+"...")
    DB.pred_is_v1.create_index(i)
    