### Aggregating top-scoring FOVs from `PublicationQuality` directory
__Keith Cheveralls__<br>
__Fall 2019__

This notebook aggregates the top-scoring z-projections of all FOVs from the `PlateMicroscopy` directory.

It is built on the output of the `fov-aggregation.ipynb` notebook. See that notebook for further notes. As is true for that notebook, this notebook is deprecated by the management methods for the `PlateMicroscopy` directory found in opencell-process repo.

In [None]:
import os
import re
import git
import sys
import glob
import json
import joblib
import sklearn
import skimage
import shutil
import tifffile
import numpy as np
import pandas as pd
import seaborn as sns

import sklearn.cluster
import sklearn.ensemble
import sklearn.model_selection

from scipy import ndimage
from skimage import feature
from skimage import morphology
import matplotlib
from matplotlib import pyplot as plt
from matplotlib import colors as mplcolors

In [None]:
def printr(s):
    sys.stdout.write('\r%s' % s)

In [None]:
sys.path.append('/Users/keith.cheveralls/projects/opencell-process/')
from opencell.imaging import utils, viz

sys.path.append('/Users/keith.cheveralls/projects/dragonfly-automation/')
import dragonfly_automation.utils
from dragonfly_automation.fov_classifier import FOVClassifier

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
root = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint8-all-DAPI-sorted/'

In [None]:
def load_and_merge(dirpath, labels_timestamp, features_timestamp):
    labels = pd.read_csv('%s/%s-labels.csv' % (dirpath, labels_timestamp))
    features = pd.read_csv('%s/%s-features.csv' % (dirpath, features_timestamp))
    data = pd.merge(labels, features, left_on='filename', right_on='filename')
    print((labels.shape, features.shape, data.shape))
    return data

In [None]:
d = load_and_merge(root, '2019-10-07', '2019-10-07')

In [None]:
fc = FOVClassifier(mode='training', model_type='regression')
fc.load('/Users/keith.cheveralls/Box/KCC-box-projects/confluency-annotator/models/2019-10-08/')
fc.train()
fc.validate()

In [None]:
# predicted scores for unsorted FOVs
X = d[list(fc.feature_order)].values
yp = fc.model.predict(X)
d['yp'] = yp

In [None]:
# highest-scoring FOV for each target
d['target_name'] = [name.split('_')[2] for name in d.filename]
d_best = d.sort_values(by='yp', ascending=False).groupby('target_name').first()

In [None]:
src_dir = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint8-all-GFP/'
dst_dir = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint8-all-GFP-top1/'

for ind, row in d_best.iterrows():
    filename = row.filename.replace('_DAPI_', '_GFP_')
    src_filepath = os.path.join(src_dir, filename)
    dst_filepath = os.path.join(dst_dir, filename)
    shutil.copy(src_filepath, dst_filepath)