In [None]:
import os
import re
import sys
import shutil
import numpy as np

from PIL import Image
from matplotlib import pyplot as plt
from scipy.ndimage import gaussian_filter, imread, median_filter, gaussian_laplace, sobel
from skimage.feature import peak_local_max
from scipy.spatial import kdtree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib
import pickle

%matplotlib inline
plt.rcParams["figure.figsize"] = [6, 6]

In [None]:
def mkdir_if_necessary(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [None]:
def exists_with_postfix(path, postfix=".jpg"):
    return os.path.exists(path + postfix)

In [None]:
def sort_overviews(d):
    files = next(os.walk(d))[2]

    ov_d = os.path.join(d, 'overviews')
    mkdir_if_necessary(ov_d)

    p = re.compile('.*?field.*?sted.*?')

    for f in files:
        if not re.match(p, f):
            shutil.move(os.path.join(d, f), os.path.join(ov_d, f))

In [None]:
def recommend_quality(im, thresh_brightest = 10, max_dist=25):
    i1 = im[:,:,0]
    g1 = gaussian_filter(i1, 1)
    i2 = im[:,:,1]
    g2 = gaussian_filter(i2, 1)
    p1 = peak_local_max(g1, min_distance=2)
    p2 = peak_local_max(g2, min_distance=2)
    
    p1i = sorted([(i1[p1[i,0], p1[i,1]], i) for i in range(len(p1))], key=lambda x: x[0], reverse=True)
    p2i = sorted([(i2[p2[i,0], p2[i,1]], i) for i in range(len(p2))], key=lambda x: x[0], reverse=True)
    
    if p1i[0][0] < thresh_brightest / 2 * np.mean(i1):
        print('BAD: channel 1 dark')
        return 'b'
    
    if p2i[0][0] < thresh_brightest / 2 * np.mean(i2):
        print('BAD: channel 2 dark')
        return 'b'
    
    if p1i[0][0] < thresh_brightest * np.mean(i1):
        print('MEDIOCRE: channel 1 dark')
        return 'm'
    
    if p2i[0][0] < thresh_brightest * np.mean(i2):
        print('MEDIOCRE: channel 2 dark')
        return 'm'
        
    
    halflife1 = sum([p1i[i][0] > 0.67 * p1i[0][0] for i in range(len(p1i))]) 
    halflife2 = sum([p2i[i][0] > 0.67 * p2i[0][0] for i in range(len(p2i))]) 
    
    print('Found ' + str(halflife1) + ' candidate peaks in channel 1')
    print('Found ' + str(halflife2) + ' candidate peaks in channel 2')
    
    if (halflife1 > 5):
        print('BAD: found too many peaks in channel 1')
        return 'b'
    
    if (halflife2 > 5):
        print('BAD: found too many peaks in channel 2')
        return 'b'
    
    p1good = [p1[p1i[i][1]] for i in range(halflife1)]
    p2good = [p2[p2i[i][1]] for i in range(halflife2)]
    tree = kdtree.KDTree(p1good)
    
    mindist = np.min(tree.query(p2good)[0])
    print('approximate minimal distance: ' + str(mindist))
    
    if (mindist > max_dist):
        print('MEDIOCRE: minimal distance too high')
        return 'm'
    
    print('GOOD')
    return 'g'

In [None]:
def getfeatures(img):
    
    i1 = im[:,:,0]
    g1 = gaussian_filter(i1, 1)
    i2 = im[:,:,1]
    g2 = gaussian_filter(i2, 1)
    p1 = peak_local_max(g1, min_distance=2)
    p2 = peak_local_max(g2, min_distance=2)
    
    p1i = sorted([(i1[p1[i,0], p1[i,1]], i) for i in range(len(p1))], key=lambda x: x[0], reverse=True)
    p2i = sorted([(i2[p2[i,0], p2[i,1]], i) for i in range(len(p2))], key=lambda x: x[0], reverse=True)
    
    halflife1 = sum([p1i[i][0] > 0.67 * p1i[0][0] for i in range(len(p1i))]) 
    halflife2 = sum([p2i[i][0] > 0.67 * p2i[0][0] for i in range(len(p2i))])
    
    #print(halflife1)
    #print(halflife2)
    
    p1good = [p1[p1i[i][1]] for i in range(halflife1)]
    p2good = [p2[p2i[i][1]] for i in range(halflife2)]
    tree = kdtree.KDTree(p1good)
    
    q = tree.query(p2good)
    
    m2 = np.argmin(q[0])
    m1 = q[1][m2]
    d = q[0][m2]
    
    features = [halflife1, halflife2, d]
    
    for sigma in [0.7, 1 , 1.5 , 2.25 , 3.5, 5]:
        features.append(gaussian_filter(i1,sigma)[tuple(p1good[m1])])
        features.append(gaussian_filter(i2,sigma)[tuple(p2good[m2])])
        features.append(gaussian_laplace(i1,sigma)[tuple(p1good[m1])])
        features.append(gaussian_laplace(i2,sigma)[tuple(p2good[m2])])
    
    features.append(sobel(i1)[tuple(p1good[m1])])
    features.append(sobel(i2)[tuple(p2good[m2])])
    features.append(i1[tuple(p1good[m1])])
    features.append(i2[tuple(p2good[m2])])
    
    return [float(f) for f in features]

In [None]:
def predict_ml(img, sc, cls):
    feat = np.array(getfeatures(img)).reshape(1,-1)
    return ['good', 'bad', 'mediocre'][cls.predict(sc.transform(feat))]

In [None]:
# init features
features = []
classes = []

In [None]:
# calculate features and classes from
# list of dictionaries to use as training data
ds = ['/Users/david/Desktop/9th_shipment_20170216/mixed_HS2_HBG2_A/K562/']

for d in ds:
    for di, _ , fl in os.walk(d):
        for f in fl:
            if f.endswith('.jpg') and di.split(os.sep)[-1] in ['good', 'bad', 'mediocre']:
                im = imread(os.path.join(di, f))
                features.append(getfeatures(im))
                classes.append(['good', 'bad', 'mediocre'].index(di.split(os.sep)[-1]))

#print(features)
#print(classes)

In [None]:
# generate scaler and Random Forrest classifier

#print(classes)

sc = StandardScaler()
sc.fit(features)

cls = RandomForestClassifier(n_estimators=100)
cls.fit(sc.transform(features), [0 if x == 0 else 1 for x in classes])

In [None]:
# save classifier and scaler

with open('/Users/david/Desktop/scaler2.pks', 'wb') as fd:
    pickle.dump(sc, fd)
#with open('/Users/david/Desktop/scaler.pks', 'rb') as fd:
#    sc2 = pickle.load(fd)
    
with open('/Users/david/Desktop/goodbadclassifier2.pks', 'wb') as fd:
    pickle.dump(cls, fd)

In [None]:
#with open('/Users/david/Desktop/scaler2.pks', 'rb') as fd:
#    sc = pickle.load(fd)
#with open('/Users/david/Desktop/scaler.pks', 'rb') as fd:
#    sc2 = pickle.load(fd)
    
with open('/Users/david/Desktop/goodbadclassifier2.pks', 'rb') as fd:
    cls = pickle.load(fd)

In [None]:
### 1: set the directory to process
#dir_to_process = os.path.join(os.getcwd(), 'AutomatedAcquisitions')
dir_to_process = '/Users/david/Desktop/6th_shipment_20161219/mixed_HS1345_HS2_B/'

In [None]:
### 2: move all the overview files into a separate folder
dirs = [d for d in next(os.walk(dir_to_process))[1] if not d.startswith('.')]
print(dirs)

for d in dirs:
    sort_overviews(os.path.join(dir_to_process, d))

In [None]:
### 3: set subfolder to process
d = os.path.join(dir_to_process, 'K562_180sec_B')

In [None]:
### 4: SORTING into good/bad/mediocre


gd_d = os.path.join(d, 'good')
bd_d = os.path.join(d, 'bad')
md_d = os.path.join(d, 'mediocre')

mkdir_if_necessary(gd_d)
mkdir_if_necessary(bd_d)
mkdir_if_necessary(md_d)

files = [f for f in next(os.walk(d))[2] if f.endswith('.msr')]

for fi in files:
    f = os.path.join(d,fi)
    im = imread(f + ".jpg")
    
    #rec = predict_ml(im, sc, cls)
    plt.imshow(im)
    #print(rec.upper())
    plt.show()
    print('-----')
    sys.stdout.flush()
    decision = input("ISGOOD? [(g)ood/(b)ad/(m)ediocre] :") #or rec
    dec = decision.upper()[0]
    
    # print(dec == "G")
    
    if dec == "G":
        shutil.move(os.path.join(d, fi), os.path.join(gd_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(gd_d, fi + ".jpg"))
    elif dec == "B":
        shutil.move(os.path.join(d, fi), os.path.join(bd_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(bd_d, fi + ".jpg"))
    elif dec == "M":
        shutil.move(os.path.join(d, fi), os.path.join(md_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(md_d, fi + ".jpg"))
    else:
        pass

In [None]:
### 4a: SORTING into good/bad/(mediocre) AUTOMATED


gd_d = os.path.join(d, 'good')
bd_d = os.path.join(d, 'bad')
md_d = os.path.join(d, 'mediocre')

mkdir_if_necessary(gd_d)
mkdir_if_necessary(bd_d)
mkdir_if_necessary(md_d)

files = [f for f in next(os.walk(d))[2] if f.endswith('.msr')]

for fi in files:
    f = os.path.join(d,fi)
    im = imread(f + ".jpg")
    
    rec = predict_ml(im, sc, cls)
    dec = rec.upper()[0]
    
    if dec == "G":
        shutil.move(os.path.join(d, fi), os.path.join(gd_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(gd_d, fi + ".jpg"))
    elif dec == "B":
        shutil.move(os.path.join(d, fi), os.path.join(bd_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(bd_d, fi + ".jpg"))
    elif dec == "M":
        shutil.move(os.path.join(d, fi), os.path.join(md_d, fi))
        shutil.move(os.path.join(d, fi + ".jpg"), os.path.join(md_d, fi + ".jpg"))