In [None]:
import os
import re
import io
import sys
import glob
import enum
import json
import dask
import xlrd
import base64
import time
import shutil
import imageio
import requests
import datetime
import urllib
import psycopg2
import numpy as np
import pandas as pd
import skimage

import cProfile
import io
import pstats
import contextlib

import tifffile
import seaborn as sns
import matplotlib as mpl

import dask.diagnostics
import sqlalchemy as db
import sqlalchemy.orm
import sqlalchemy.ext.declarative
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 1

sys.path.append('../..')
%aimport opencell.imaging.managers
%aimport opencell.imaging.processors

from opencell.database import models, operations
from opencell.database import utils as db_utils
from opencell.api import payloads
from opencell.api.resources import PulldownInteractions

In [None]:
# url = db_utils.url_from_credentials('../../db-credentials-test.json')
url = db_utils.url_from_credentials('../../db-credentials-dev.json')
# url = db_utils.url_from_credentials('../../db-credentials-cap.json')

engine = db.create_engine(url, echo=False)
session_factory = db.orm.sessionmaker(bind=engine)
Session = db.orm.scoped_session(session_factory)
url

In [None]:
# test the /data/ location (proxied to S3 bucket)

# url = 'http://localhost/data/test.txt'
# url = 'http://opencell.s3.us-west-2.amazonaws.com/opencell-microscopy/test.txt'
url = 'http://opencell.ds.czbiohub.org/data/test.txt'

headers = {'Referer': 'http://opencell.ds.czbiohub.org/'}
# headers = None

r = requests.get(url, headers=headers)
r.status_code

In [None]:
@contextlib.contextmanager
def profiled():
    pr = cProfile.Profile()
    pr.enable()
    yield
    pr.disable()
    s = io.StringIO()
    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
    ps.print_stats()
    # uncomment this to see who's calling what
    # ps.print_callers()
    print(s.getvalue())

### pulldown endpoint

In [None]:
# original slow pulldown query
pulldown_id = 562
query = (
    Session.query(models.MassSpecPulldown)
    .options(
        db.orm.joinedload(models.MassSpecPulldown.hits)
            .joinedload(models.MassSpecHit.protein_group)
            .joinedload(models.MassSpecProteinGroup.crispr_designs),
        db.orm.joinedload(models.MassSpecPulldown.hits)
            .joinedload(models.MassSpecHit.protein_group)
            .joinedload(models.MassSpecProteinGroup.uniprot_metadata),
    )
    .filter(models.MassSpecPulldown.id == pulldown_id)
)

start = time.time()
result = query.all()
end = time.time()
end - start, len(result)

In [None]:
# get everything, but just for significant hits
pulldown_id = 562
query = (
    Session.query(models.MassSpecHit)
    .options(
        db.orm.joinedload(models.MassSpecHit.protein_group, innerjoin=True)
            .joinedload(models.MassSpecProteinGroup.crispr_designs),
        db.orm.joinedload(models.MassSpecHit.protein_group, innerjoin=True)
            .joinedload(models.MassSpecProteinGroup.uniprot_metadata),
    )
    .filter(models.MassSpecHit.pulldown_id == pulldown_id)
    .filter(db.or_(models.MassSpecHit.is_minor_hit == True, models.MassSpecHit.is_significant_hit == True))
)

start = time.time()
result = query.all()
end = time.time()
end - start, len(result)

In [None]:
# get the pval and enrichment for all non-significant hits
pulldown_id = 562
query = (
    Session.query(models.MassSpecHit.pval, models.MassSpecHit.enrichment)
    .filter(models.MassSpecHit.pulldown_id == pulldown_id)
    .filter(models.MassSpecHit.is_minor_hit == False)
    .filter(models.MassSpecHit.is_significant_hit == False)
)

start = time.time()
result = query.all()
end = time.time()
end - start, len(result)

In [None]:
with profiled():
    query.one()

In [None]:
engine.url

In [None]:
start = time.time()
r = requests.get('http://localhost:5000/lines/701/pulldown')
end = time.time()
end - start, r.status_code

### fovs endpoint

In [None]:
engine.url

In [None]:
start = time.time()
r = requests.get('http://localhost:5000/lines/408/fovs?fields=rois')
end = time.time()
end - start, r.status_code

In [None]:
start = time.time()
r = requests.get('http://cap.czbiohub.org/api/lines/408/fovs?fields=rois')
end = time.time()
end - start, r.status_code

In [None]:
query = (
    Session.query(models.MicroscopyFOV)
    .options(
        db.orm.joinedload(models.MicroscopyFOV.dataset, innerjoin=True),
        db.orm.joinedload(models.MicroscopyFOV.results, innerjoin=True),
        db.orm.joinedload(models.MicroscopyFOV.annotation)
    )
    .filter(models.MicroscopyFOV.cell_line_id == 701)
    .filter(models.MicroscopyFOV.annotation != None)  # noqa
)

query = query.options(
    db.orm.joinedload(models.MicroscopyFOV.rois, innerjoin=True)
)

In [None]:
start = time.time()
result = query.all()
end = time.time()
end - start, len(result)

### lines endpoint (without filters)

In [None]:
start = time.time()
r = requests.get('http://localhost:5000/lines?publication_ready=true')
end = time.time()
end - start, r.status_code, len(r.json())

In [None]:
lines = (
    Session.query(models.CellLine)
    .options(
        db.orm.joinedload(models.CellLine.crispr_design, innerjoin=True).joinedload(models.CrisprDesign.uniprot_metadata, innerjoin=True),
        db.orm.joinedload(models.CellLine.facs_dataset),
        db.orm.joinedload(models.CellLine.sequencing_dataset),
        db.orm.joinedload(models.CellLine.annotation),
    )
    .all()
)

In [None]:
select cell_line.id, count(fov.id), count(ant.id) from cell_line
LEFT JOIN microscopy_fov AS fov ON cell_line.id = fov.cell_line_id 
LEFT JOIN microscopy_fov_annotation AS ant ON fov.id = ant.fov_id
group by cell_line.id
order by cell_line.id desc

In [None]:
result = (
    Session.query(
        models.CellLine.id, 
        db.func.count(models.MicroscopyFOV.id).label('num_fovs'),
        db.func.count(models.MicroscopyFOVAnnotation.id).label('num_annotated_fovs'),
    )
    .outerjoin(models.CellLine.fovs)
    .outerjoin(models.MicroscopyFOV.annotation)
    .group_by(models.CellLine.id)
    .all()
)

In [None]:
float(Session.query(models.MassSpecHit).limit(1).one().enrichment)

### Pulldown interactions endpoint

In [None]:
start = time.time()
r = requests.get(
    'http://opencell.czbiohub.org/api/pulldowns/679/interactions?analysis_type=new&subcluster_type=core-complexes'
)
end = time.time()
end - start, r.status_code

In [None]:
len(r.json()['nodes'])

In [None]:
target_pulldown = (
    Session.query(models.MassSpecPulldown)
    .filter(models.MassSpecPulldown.id == 559)
    .one()
)
target_pulldown

In [None]:
bait_hits = (
    Session.query(MassSpecHit)
    .join(models.MassSpecProteinGroup)
    .join(models.ProteinGroupCrisprDesignAssociation)
    .join(models.CrisprDesign)
    .filter(MassSpecHit.pulldown_id == 559)
    .filter(db.or_(
        MassSpecHit.is_minor_hit == True,  # noqa
        MassSpecHit.is_significant_hit == True  # noqa
    ))
    .filter(models.CrisprDesign.id == target_pulldown.cell_line.crispr_design.id)
    .all()
)
bait_hits

In [None]:
from opencell.database.models import MassSpecHit, MassSpecPulldown 

In [None]:
pg_ids = [pg.id for pg in target_pulldown.cell_line.crispr_design.protein_groups]

In [None]:
query = (
    Session.query(MassSpecPulldown).join(MassSpecHit)
    .filter(db.or_(
        MassSpecPulldown.manual_display_flag == None,
        MassSpecPulldown.manual_display_flag == True
    ))
    .filter(MassSpecPulldown.id != 559)
    .filter(MassSpecHit.protein_group_id.in_(pg_ids))
    .filter(db.or_(MassSpecHit.is_minor_hit == True, MassSpecHit.is_significant_hit == True)) # noqa
)

start = time.time()
result = query.all()
end = time.time()
end - start, len(result)