In [1]:
import numpy as np
import pandas as pd
import os
import diff_classifier.aws as aws
import diff_classifier.features as ft
import diff_classifier.msd as msd
import diff_classifier.heatmaps as hm
import diff_classifier.imagej as ij
import boto3
import os.path as op
import cloudknot as ck

In [2]:
to_track = {}
knot = {}
result_futures = {}
start_knot = 918 #Must be unique number for every run on Cloudknot.

slices = ['1', '2']
regions = [1, 2, 3]
videos = [1, 2, 3, 4, 5]
folder = 'Tissue_Studies/04_23_18_Registration_Test/tracking' #Folder in AWS S3 containing files to be analyzed



In [3]:
def download_split_track_msds(prefix):
    """
    1. Checks to see if features file exists.
    2. If not, checks to see if image partitioning has occured.
    3. If yes, checks to see if tracking has occured.
    4. Regardless, tracks, calculates MSDs and features.
    """

    import matplotlib as mpl
    #mpl.use('Agg')
    import diff_classifier.aws as aws
    import diff_classifier.utils as ut
    import diff_classifier.msd as msd
    import diff_classifier.features as ft
    import diff_classifier.imagej as ij
    import diff_classifier.heatmaps as hm

    from scipy.spatial import Voronoi
    import scipy.stats as stats
    from shapely.geometry import Point
    from shapely.geometry.polygon import Polygon
    import matplotlib.cm as cm
    import os
    import os.path as op
    import glob
    import numpy as np
    import numpy.ma as ma
    import pandas as pd
    import boto3

    #Splitting section
    ###############################################################################################
    remote_folder = "Tissue_Studies/04_23_18_Registration_Test/tracking"
    local_folder = os.getcwd()
    ires = 512
    frames = 651
    filename = '{}.tif'.format(prefix)
    remote_name = remote_folder+'/'+filename
    local_name = local_folder+'/'+filename

    msd_file = 'msd_{}.csv'.format(prefix)
    ft_file = 'features_{}.csv'.format(prefix)

    s3 = boto3.client('s3')

    names = []
    for i in range(0, 4):
        for j in range(0, 4):
            names.append('{}_{}_{}.tif'.format(prefix, i, j))

    try:
        obj = s3.head_object(Bucket='ccurtis.data', Key=remote_folder+'/'+ft_file)
    except:

        try:
            for name in names:
                aws.download_s3(remote_folder+'/'+name, name, bucket_name='ccurtis.data')
        except:
            aws.download_s3(remote_name, local_name, bucket_name='ccurtis.data')
            names = ij.partition_im(local_name)
            
            names = []
            for i in range(0, 4):
                for j in range(0, 4):
                    names.append('{}_{}_{}.tif'.format(prefix, i, j))
            for name in names:
                aws.upload_s3(name, remote_folder+'/'+name, bucket_name='ccurtis.data')
                print("Done with splitting.  Should output file of name {}".format(remote_folder+'/'+name))

        #Tracking section
        ################################################################################################
        names = []
        for i in range(0, 4):
                for j in range(0, 4):
                    names.append('{}_{}_{}.tif'.format(prefix, i, j))

        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = op.join(local_folder, name)

            row = int(name.split('.')[0].split('_')[4])
            col = int(name.split('.')[0].split('_')[5])

            try:
                aws.download_s3(remote_folder+'/'+outfile, outfile, bucket_name='ccurtis.data')
            except:
                test_intensity = ij.mean_intensity(local_im)
                if test_intensity > 500:
                    quality = 245
                else:
                    quality = 0.1

                if row==3:
                    y = 485
                else:
                    y = 511

                ij.track(local_im, outfile, template=None, fiji_bin=None, radius=5.0, threshold=0.01,
                         do_median_filtering=True, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0,
                         linking_max_distance=8.0, gap_closing_max_distance=15.0, max_frame_gap=9,
                         track_displacement=0.0)

                aws.upload_s3(outfile, remote_folder+'/'+outfile, bucket_name='ccurtis.data')
            print("Done with tracking.  Should output file of name {}".format(remote_folder+'/'+outfile))


        #MSD and features section
        #################################################################################################
        files_to_big = False
        size_limit = 10

        for name in names:
            outfile = 'Traj_' + name.split('.')[0] + '.csv'
            local_im = name
            file_size_MB = op.getsize(local_im)/1000000
            if file_size_MB > size_limit:
                file_to_big = True

        if files_to_big:
            print('One or more of the {} trajectory files exceeds {}MB in size.  Will not continue with MSD calculations.'.format(
                  prefix, size_limit))
        else:
            counter = 0
            for name in names:
                row = int(name.split('.')[0].split('_')[4])
                col = int(name.split('.')[0].split('_')[5])

                filename = "Traj_{}_{}_{}.csv".format(prefix, row, col)
                local_name = local_folder+'/'+filename

                if counter == 0:
                    to_add = ut.csv_to_pd(local_name)
                    to_add['X'] = to_add['X'] + ires*col
                    to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                    merged = msd.all_msds2(to_add, frames=frames)
                else:

                    if merged.shape[0] > 0:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID'] + max(merged['Track_ID']) + 1
                    else:
                        to_add = ut.csv_to_pd(local_name)
                        to_add['X'] = to_add['X'] + ires*col
                        to_add['Y'] = ires - to_add['Y'] + ires*(3-row)
                        to_add['Track_ID'] = to_add['Track_ID']

                    merged = merged.append(msd.all_msds2(to_add, frames=frames))
                    print('Done calculating MSDs for row {} and col {}'.format(row, col))
                counter = counter + 1

            merged.to_csv(msd_file)
            aws.upload_s3(msd_file, remote_folder+'/'+msd_file, bucket_name='ccurtis.data')
            merged_ft = ft.calculate_features(merged)
            merged_ft.to_csv(ft_file)

            aws.upload_s3(ft_file, remote_folder+'/'+ft_file, bucket_name='ccurtis.data')

            #Plots
            features = ('AR', 'D_fit', 'alpha', 'MSD_ratio', 'Track_ID', 'X', 'Y', 'asymmetry1', 'asymmetry2', 'asymmetry3',
                        'boundedness', 'efficiency', 'elongation', 'fractal_dim', 'frames', 'kurtosis', 'straightness', 'trappedness')

            die = {'features': features,
                   'vmin': vmin,
                   'vmax': vmax}
            di = pd.DataFrame(data=die)

            hm.plot_trajectories(prefix, remote_folder=remote_folder, bucket='ccurtis.data')
            try:
                hm.plot_histogram(prefix, remote_folder=remote_folder, bucket='ccurtis.data')
            except ValueError:
                print("Couldn't plot histogram.")
            hm.plot_particles_in_frame(prefix, remote_folder=remote_folder, bucket='ccurtis.data')
            gmean1, gSEM1 = hm.plot_individual_msds(prefix, alpha=0.05, remote_folder=remote_folder, bucket='ccurtis.data')
            
    for filename in glob.glob('./100*'):
        os.remove(filename)

In [None]:
def test_cloudknot(prefix):
    print('The run {} was successful'.format(prefix))

In [4]:
github_installs=('https://github.com/ccurtis7/diff_classifier.git')
my_image = ck.DockerImage(func=download_split_track_msds, base_image='arokem/python3-fiji:0.3', github_installs=github_installs)

In [5]:
docker_file = open(my_image.docker_path)
docker_string = docker_file.read()
docker_file.close()

req = open(op.join(op.split(my_image.docker_path)[0], 'requirements.txt'))
req_string = req.read()
req.close()

new_req = req_string[0:req_string.find('\n')-4]+'5.28'+ req_string[req_string.find('\n'):]
req_overwrite = open(op.join(op.split(my_image.docker_path)[0], 'requirements.txt'), 'w')
req_overwrite.write(new_req)
req_overwrite.close()

In [None]:
new_req

In [1]:
#Test Docker Image
my_image.build("0.1", image_name="test_image")

NameError: name 'my_image' is not defined

In [None]:
ck.clobber(name='diff_classifier_904')

In [6]:
to_track = []
for slic in slices:
    for region in regions:
        for video in videos:
            prefix = '100nm_S{}_XY{}_{}'.format(slic, region, video)
            to_track.append(prefix)

test_length = len(to_track)
print('Number of nodes to be loaded: {}'.format(test_length))

knot = ck.Knot(name='diff_classifier_{}'.format(start_knot),
               docker_image = my_image,
               memory = 32000,
               resource_type = "SPOT",
               bid_percentage = 100,
               image_id = 'ami-0de34a0a338c1051b',
               pars_policies=('AmazonS3FullAccess',))
               
result_futures = knot.map(to_track)
start_knot = start_knot + 1
print('Next knot name: {}'.format(start_knot))

Number of nodes to be loaded: 30
Next knot name: 919


In [None]:
knot.clobber()

In [None]:
prefix = '100nm_S1_XY2_5'
download_split_track_msds(prefix)

In [None]:
folder = '/home/ubuntu/source/diff-classifier/notebooks'
fname = 'features_P1_S1_L_0000.csv'
file = '{}/{}'.format(folder, fname)
features = pd.read_csv(file, index_col='Unnamed: 0')

In [None]:
import xml.etree.ElementTree as ET

In [None]:
slices = ['1', '2']
regions = [1, 2, 3]
videos = [1, 2, 3, 4, 5]

for slic in slices:
    for region in regions:
        for video in videos:
            prefix = '100nm_S{}_XY{}_{}'.format(slic, region, video)
            download_split_track_msds(prefix)

In [None]:
remote_folder = "Tissue_Studies/04_23_18_Registration_Test/tracking"
local_folder = os.getcwd()
ires = 512
frames = 651
filename = '{}.tif'.format(prefix)
remote_name = remote_folder+'/'+filename
local_name = local_folder+'/'+filename

msd_file = 'msd_{}.csv'.format(prefix)
ft_file = 'features_{}.csv'.format(prefix)

s3 = boto3.client('s3')

names = []
for i in range(0, 4):
    for j in range(0, 4):
        names.append('{}_{}_{}.tif'.format(prefix, i, j))

try:
    obj = s3.head_object(Bucket='ccurtis.data', Key=remote_folder+'/'+ft_file)
except:

    try:
        for name in names:
            aws.download_s3(remote_folder+'/'+name, name, bucket_name='ccurtis.data')
    except:
        aws.download_s3(remote_name, local_name, bucket_name='ccurtis.data')
        names = ij.partition_im(local_name)
        names = []
        for i in range(0, 4):
            for j in range(0, 4):
                names.append('{}_{}_{}.tif'.format(prefix, i, j))
        for name in names:
            aws.upload_s3(name, remote_folder+'/'+name, bucket_name='ccurtis.data')
            print("Done with splitting.  Should output file of name {}".format(remote_folder+'/'+name))

In [None]:
!rm 100*

In [None]:
!rm feat*
!rm msd*
!rm Traj*

In [None]:
prefix = '100nm_S1_XY2_5_1_1'
local_folder = '.'
name = "{}.tif".format(prefix)
outfile = 'Traj_' + name.split('.')[0] + '.csv'
local_im = op.join(local_folder, name)
quality = 0
y=511
ij.track(local_im, outfile, template=None, fiji_bin=None, radius=5, threshold=0.1,
                         do_median_filtering=True, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0,
                         linking_max_distance=10.0, gap_closing_max_distance=12.0, max_frame_gap=9,
                         track_displacement=0.0)

In [None]:
local_im

In [None]:
prefix = '100nm_S1_XY2_5_1_2'
filename = '{}.tif'.format(prefix)
fname = "Tissue_Studies/04_23_18_Registration_Test/tracking/{}".format(filename)

aws.download_s3(fname, filename, bucket_name='ccurtis.data')

In [None]:
local_folder = '.'
name = "{}.tif".format(prefix)
outfile = 'Traj_' + name.split('.')[0] + '.csv'
local_im = op.join(local_folder, name)
quality = 0.1
y=511
ij.track(local_im, outfile, template=None, fiji_bin=None, radius=5.0, threshold=0.01,
                         do_median_filtering=False, quality=quality, x=511, y=y, ylo=1, median_intensity=300.0, snr=0.0,
                         linking_max_distance=8.0, gap_closing_max_distance=15.0, max_frame_gap=9,
                         track_displacement=0.0)