In [None]:
import os
import re
import io
import sys
import glob
import json
import dask
import requests
import datetime
import urllib
import psycopg2
import numpy as np
import pandas as pd

import dask.diagnostics
import sqlalchemy as db
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 1

sys.path.append('../..')
%aimport opencell.database.operations
%aimport opencell.database.uniprot_utils
from opencell.database import models, operations, ms_utils, uniprot_utils
from opencell.database import utils as db_utils

In [None]:
url = db_utils.url_from_credentials('../../db-credentials-docker.json')
engine = db.create_engine(url)
session_factory = db.orm.sessionmaker(bind=engine)
Session = db.orm.scoped_session(session_factory)

### List all plates imaged more recently than a given date

In [None]:
earliest_date = '2020-06-12'

datasets = pd.read_sql(
    f'''
    select fov.pml_id, ds.date, plate_design_id
    from microscopy_fov fov
    left join microscopy_dataset ds on fov.pml_id = ds.pml_id
    left join cell_line cl on cl.id = fov.cell_line_id
    left join crispr_design cd on cd.id = cl.crispr_design_id
    where ds.date >= '{earliest_date}'
    group by (fov.pml_id, plate_design_id, ds.date)
    order by plate_design_id;
    ''',
    engine
)

datasets

### List all targets imaged more recently than a given date

In [None]:
earliest_date = '2020-06-12'

targets = pd.read_sql(
    f'''
    select fov.pml_id, ds.date, plate_design_id, well_id, max(target_name) as target 
    from microscopy_fov fov
    left join microscopy_dataset ds on fov.pml_id = ds.pml_id
    left join cell_line cl on cl.id = fov.cell_line_id
    left join crispr_design cd on cd.id = cl.crispr_design_id
    where ds.date >= '{earliest_date}'
    group by (fov.pml_id, plate_design_id, well_id, ds.date)
    order by (plate_design_id, well_id);
    ''',
    engine
)

print(
    'Found %s newly imaged targets on %s plates'
    % (targets.shape[0], len(targets.plate_design_id.unique()))
)

In [None]:
targets.head()

In [None]:
targets.to_csv('/gpfsML/ML_group/opencell-microscopy/targets-imaged-since-%s.csv' % earliest_date)

### List all datasets containing FOVs for a given target

In [None]:
target_name = 'MTOR'

datasets = pd.read_sql(
    f'''
    select fov.pml_id, ds.date, plate_design_id, well_id, max(target_name) as target_name
    from microscopy_fov fov
    left join microscopy_dataset ds on fov.pml_id = ds.pml_id
    left join cell_line cl on cl.id = fov.cell_line_id
    left join crispr_design cd on cd.id = cl.crispr_design_id
    where cd.target_name = '{target_name}'
    group by (fov.pml_id, plate_design_id, well_id, ds.date)
    order by pml_id desc
    ''',
    engine
)

datasets

### Plot the number of FOVs in each dataset

In [None]:
d = pd.read_sql(
    '''
    select fov.pml_id, plate_design_id, d.date, count(*) as num_fovs from microscopy_fov fov
    left join microscopy_dataset d on fov.pml_id = d.pml_id
    left join cell_line cl on cl.id = fov.cell_line_id
    left join crispr_design cd on cd.id = cl.crispr_design_id
    group by (fov.pml_id, plate_design_id, d.date)
    order by pml_id desc
    ''',
    engine
)

In [None]:
d.head()

In [None]:
d['date'] = pd.to_datetime(d.date)
d.index = d.date
d = d.sort_index()

In [None]:
plt.plot(d.index, d.num_fovs)

In [None]:
plt.plot(d.index, d.num_fovs.rolling('7d').sum())