In [None]:
import os
import re
import io
import sys
import glob
import enum
import json
import dask
import xlrd
import base64
import time
import imageio
import requests
import datetime
import psycopg2
import numpy as np
import pandas as pd
import skimage

import dask.diagnostics
import sqlalchemy as db
import sqlalchemy.orm
import sqlalchemy.ext.declarative
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 1

sys.path.append('../..')
%aimport opencell.imaging.managers
%aimport opencell.imaging.processors
%aimport opencell.file_utils

from opencell import constants, file_utils
from opencell.cli import database_cli
from opencell.cli import fov_cli
from opencell.database import models
from opencell.database import operations
from opencell.database import utils as db_utils
from opencell.imaging import utils as im_utils
from opencell.imaging import images, managers, processors, viz

In [None]:
url = db_utils.url_from_credentials('../../db-credentials-dev.json')
# url = db_utils.url_from_credentials('../../db-credentials-cap.json')
url

In [None]:
engine = db.create_engine(url)
session_factory = db.orm.sessionmaker(bind=engine)
Session = db.orm.scoped_session(session_factory)
session = Session()

# reflect the cell_line_metadata view
clm = db.Table("cell_line_metadata", models.Base.metadata, autoload=True, autoload_with=engine)
fov_rank = db.Table("fov_rank", models.Base.metadata, autoload=True, autoload_with=engine)

### Append FACS grade and sequencing results to Manu's annotations CSV

This is on 2020-02-26 for Hera.

In [None]:
d_raw = pd.read_csv('/Users/keith.cheveralls/2020-02-21-17-05-12_cell-line-annotations_ML.csv')

In [None]:
list(d_raw.loc[d_raw.no_gfp == 1].target_name)

In [None]:
with open('/Users/keith.cheveralls/projects/opencell-vis/src/demo/data/facs_grades.json', 'r') as file:
    facs_grades = json.load(file)

In [None]:
facs_grades

In [None]:
for ind, row in d.iterrows():
    d.at[ind, 'facs_grade'] = facs_grades.get('%s-%s' % (row.plate_id, row.well_id))

In [None]:
d['hdr_all'] = None
d['hdr_modified'] = None

for ind, row in d.iterrows():
    try:
        sequencing = (
            Session.query(models.SequencingDataset)
            .filter(models.SequencingDataset.cell_line_id == row.cell_line_id)
            .first()
        )
        d.at[ind, 'hdr_all'] = sequencing.scalars.get('hdr_all')
        d.at[ind, 'hdr_modified'] = sequencing.scalars.get('hdr_modified')
    except Exception as error:
        print(row.cell_line_id)

In [None]:
d.to_csv('/Users/keith.cheveralls/projects/opencell/cache/2020-02-26_annotations-w-facs-seq.csv')

### FOV flags for Hiro

This is on 2020-04-30. This is a list of flags for each FOV. The flags are: 
- the 'no_gfp' and 'publication_quality' target annotations
- whether the FOV has a manually annotated ROI
- whether there are nuclei in the FOV

In [None]:
def all_results_fast(kind):
    query = '''
        select fov.*, res.kind as kind, res.data as data from microscopy_fov fov
        left join (select * from microscopy_fov_result where kind = '%s') res 
        on fov.id = res.fov_id;'''
    df = pd.read_sql(query % kind, engine)
    return df

In [None]:
df = all_results_fast('fov-features')
df.shape

In [None]:
df

In [None]:
fov = Session.query(models.MicroscopyFOV).first()
p = processors.FOVProcessor.from_database(fov)

In [None]:
fov.cell_line.annotation.categories

In [None]:
df['score'] = [data.get('score') for data in df.data]

In [None]:
for col in [
    'filepath', 
    'fov_has_nuclei', 
    'fov_is_annotated', 
    'target_name', 
    'target_is_gfp_negative', 
    'target_is_pub_ready'
]:
    df[col] = None

In [None]:
for ind, row in df.iterrows():
    if not ind % 1000:
        print(ind)

    fov = Session.query(models.MicroscopyFOV).filter(models.MicroscopyFOV.id == row.id).one()
    p = processors.FOVProcessor.from_database(fov)

    # the TIFF filepath relative the the clean/ directory
    df.at[ind, 'filepath'] = p.dst_filepath(kind='clean', ext='tif')

    # whether there are any nuclei in the FOV
    df.at[ind, 'fov_has_nuclei'] = not pd.isna(row.score)

    # whether the FOV has an annotated ROI
    df.at[ind, 'fov_is_annotated'] = True if fov.annotation else False

    # the target name
    df.at[ind, 'target_name'] = fov.cell_line.crispr_design.target_name

    # target annotation flags
    df.at[ind, 'target_is_annotated'] = fov.cell_line.annotation is not None
    if fov.cell_line.annotation is not None:
        df.at[ind, 'target_is_gfp_negative'] = 'no_gfp' in fov.cell_line.annotation.categories
        df.at[ind, 'target_is_publication_quality'] = 'publication_ready' in fov.cell_line.annotation.categories

In [None]:
# the flag that determines whether the FOV should be included in the training data
df['final_fov_flag'] = (df.fov_has_nuclei & df.target_is_publication_quality)

In [None]:
dff = df.drop(labels=['pml_id', 'imaging_round_id', 'site_num', 'raw_filename', 'kind', 'data'], axis=1)
dff.rename(columns={'id': 'fov_id'}, inplace=True)

In [None]:
dff = dff.sort_values(by='cell_line_id')

In [None]:
dff.to_csv('/Volumes/ml_group/opencell-microscopy/2020-04-30_fov-flags.csv', header=True, index=False)

In [None]:
dff.to_csv('2020-04-30_fov-flags.csv', header=True, index=False)

In [None]:
# CSV of only the FOVs to include in the training data
(
    dff.loc[dff.final_fov_flag][['cell_line_id', 'target_name', 'fov_id', 'filepath']]
    .to_csv('/Volumes/ml_group/opencell-microscopy/2020-04-30_good-fovs.csv', header=True, index=False)
)

In [None]:
(
    (dff.fov_has_nuclei & dff.target_is_publication_quality).sum(),
    (dff.fov_has_nuclei & dff.fov_is_annotated).sum(),
)