In [86]:
%matplotlib inline
from ndreg import *
import matplotlib
import ndio.remote.neurodata as neurodata
import numpy as np
import cv2

import plotly
import plotly.graph_objs as graphobjs

import nibabel as nib

import image_utils

In [56]:
import cs475 
reload(cs475_types)
from cs475_types import Predictor

In [87]:
import math
from copy import deepcopy
from cs475_types import Predictor

class LambdaMeans(Predictor):
    def __init__(self, cluster_lambda, clustering_training_iterations, instances, max_index):
        self.cluster_iterations = clustering_training_iterations
        self.max_index = max_index          # max index of a feature in our data
        average_instance = [0] * max_index  # create the first cluster
        for i in range(max_index):
            average_instance[i] = instances[0]._feature_vector.get(i)
        for i in range(1, len(instances)):
            curr_instance = instances[i]
            for feature in curr_instance._feature_vector.keys():
                average_instance[feature] += curr_instance._feature_vector.get(feature)
        for i in range(max_index): 
            average_instance[i] /= len(instances)
        self.cluster_means = []             # stores clusters
        self.cluster_means.append(average_instance)
        self.num_clusters = 1               # number of clusters
        if cluster_lambda > 0:
            self.cluster_lambda = cluster_lambda
        else:
            cluster_lambda = 0
            for instance in instances:
                cluster_lambda += self.distance(average_instance, instance._feature_vector) ** 2
            self.cluster_lambda = cluster_lambda / len(instances)
        self.instance_best_dist = -1        # stores the distance to cluster for last instance looked at


    def train(self, instances):
        for i in range(self.cluster_iterations):
            r_nk = [] # stores cluster each instance is in
            cluster_count = [0] * self.num_clusters
            # E Step
            for a in range(len(instances)):
                instance = instances[a]
                k = self.predict(instance)
                square_dist = self.instance_best_dist ** 2
                if square_dist <= self.cluster_lambda:
                    r_nk.append(k)
                    cluster_count[k] += 1
                else:
                    new_cluster = []
                    for i in range(self.max_index):
                        new_cluster.append(instance._feature_vector.get(i))
                    self.cluster_means.append(new_cluster)
                    r_nk.append(self.num_clusters)
                    self.num_clusters += 1
                    cluster_count.append(1)
            # M Step
            for m in range(self.num_clusters):
                self.cluster_means[m] = [0] * self.max_index
            for x in range(len(instances)):
                cluster = r_nk[x]
                cluster_mean = self.cluster_means[cluster]
                instance = instances[x]
                for feature in instance._feature_vector.keys():
                    cluster_mean[feature] += instance._feature_vector.get(feature) / float(cluster_count[cluster])


    def predict(self, instance):
        best_dist = -1
        for x in range (self.num_clusters):
            cluster_mean = self.cluster_means[x]
            dist = self.distance(cluster_mean, instance._feature_vector)
            if dist < best_dist or best_dist == -1:
                best_dist = dist
                best_cluster = x
        self.instance_best_dist = best_dist  # store best dist for this instance
        return best_cluster


    def distance(self, list_a, vector_b):
        total_sum = 0
        for i in range(self.max_index):
            total_sum += (list_a[i] - vector_b.get(i)) ** 2
        return math.sqrt(total_sum)


In [88]:
from cs475_types import ClassificationLabel, FeatureVector, Instance, Predictor

def load_data(data):
    instances = []
    for point in data:

        label = ClassificationLabel(0)
        feature_vector = FeatureVector()
            
        for i in range(3):
            if point[i] != 0.0:
                feature_vector.add(i, point[i])

        instance = Instance(feature_vector, label)
        instances.append(instance)

    return instances

def do_lambda_means_clustering(cluster_lambda, clustering_training_iterations, instances, max_index):
    predictor = LambdaMeans(cluster_lambda, clustering_training_iterations, instances, max_index)
    predictor.train(instances)
    return predictor.cluster_means

In [93]:
# plotting no histeq
# full image
temp_file_path = "img/" + "Fear199" + ".nii"

# Downsampled image
# temp_file_path = "img/" + inToken + "_ds.nii"

# Histogram Equilized image
# temp_file_path = "histeq/" + inToken + "_histeq.nii"

num_points = 7500

temp_img = nib.load(temp_file_path)

## Sanity check for shape
temp_img.shape

## Convert into np array (or memmap in this case)
temp_data = temp_img.get_data()
temp_shape = temp_img.shape
temp_max = np.max(temp_data)

print('shape:')
print temp_data.shape
print type(temp_data)

print('max:')
print(temp_max)

# print(temp_data)

temp_threshold = 0.01
filt = temp_data > temp_threshold * temp_max
# filt = temp_data

data_points = np.where(filt)
x = data_points[0]
y = data_points[1]
z = data_points[2]

intens = temp_data[filt]
intens = np.int16(255 * (np.float32(intens) / np.float32(temp_max)))

intens_shape = intens.shape

total_points = intens.shape[0]

fraction = num_points / float(total_points)

if fraction < 1.0:
    # np.random.random returns random floats in the half-open interval [0.0, 1.0)
    filt = np.random.random(size=intens_shape) < fraction
    print('v.shape:')
    print(intens_shape)
    print('x.size before filter: %d' % x.size)
    print('y.size before filter: %d' % y.size)
    print('z.size before filter: %d' % z.size)
    print('v.size before filter: %d' % intens.size)
    x = x[filt]
    y = y[filt]
    z = z[filt]
    intens = intens[filt]
    print('x.size after filter: %d' % x.size)
    print('y.size after filter: %d' % y.size)
    print('z.size after filter: %d' % z.size)
    print('v.size after filter: %d' % intens.size)


temp_points = np.vstack([x, y, z, intens])
temp_points = np.transpose(temp_points)
print("Num Points: %d"%(temp_points.shape[0]))


shape:
(405, 272, 537)
<class 'numpy.core.memmap.memmap'>
max:
3969
v.shape:
(5423879,)
x.size before filter: 5423879
y.size before filter: 5423879
z.size before filter: 5423879
v.size before filter: 5423879
x.size after filter: 7408
y.size after filter: 7408
z.size after filter: 7408
v.size after filter: 7408
Num Points: 7408


In [94]:
image_utils.array_to_plot(temp_points, point_size=1.5, outfile_name='Fear199_og')

In [92]:
resolution = (0.01872, 0.01872, 0.005)
thedata = temp_points
#     print(thedata)

# Set tupleResolution to resolution input parameter
tupleResolution = resolution;

# EG: for Aut1367, the spacing is (0.01872, 0.01872, 0.005).
xResolution = tupleResolution[0]
yResolution = tupleResolution[1]
zResolution = tupleResolution[2]
# Now, to get the mm image size, we can multiply all x, y, z
# to get the proper mm size when plotting.

#     print('asdf')
#     x = [x * xResolution for x in thedata[:, 0]]
#     print(x)

trace1 = graphobjs.Scatter3d(
    x = [x * xResolution for x in thedata[:, 0]],
    y = [x * yResolution for x in thedata[:, 1]],
    z = [x * zResolution for x in thedata[:, 2]],
    mode='markers',
    marker=dict(
        size=1.2,
        color='cyan',                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.15
    )
)

data = [trace1]
layout = graphobjs.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    ),
    paper_bgcolor='rgb(0,0,0)',
    plot_bgcolor='rgb(0,0,0)'
)

fig = graphobjs.Figure(data=data, layout=layout)
#     print(self._token + "plotly")

plotly.offline.plot(fig, filename= 'plots/Fear199_og2_plot.html')


'file:///root/cell-segmentation-mlfinal/plots/Fear199_og2_plot.html'

In [95]:
instances = load_data(temp_points)

In [97]:
result_pts = do_lambda_means_clustering(100, 10, instances, 3)

print('num points: %d' % len(result_pts))
print(result_pts)


num points: 2826
[[235.0, 141.0, 241.0], [55.0, 151.5, 222.0], [54.0, 138.0, 223.0], [56.0, 98.0, 238.0], [60.5, 125.5, 199.5], [58.0, 103.0, 258.0], [61.0, 147.66666666666669, 204.66666666666669], [60.333333333333329, 152.66666666666666, 194.33333333333334], [61.200000000000003, 121.40000000000001, 222.59999999999997], [61.5, 132.0, 238.5], [63.0, 109.0, 244.5], [64.0, 158.0, 204.0], [64.0, 174.5, 217.0], [61.0, 76.0, 312.0], [61.0, 80.0, 299.0], [63.0, 112.5, 201.5], [61.0, 141.0, 255.0], [63.5, 147.0, 225.25], [65.0, 171.0, 198.0], [64.25, 92.75, 261.0], [66.0, 106.33333333333331, 233.66666666666669], [63.666666666666671, 130.0, 210.66666666666669], [65.0, 154.0, 215.0], [62.0, 166.0, 242.0], [66.75, 81.25, 271.0], [66.0, 120.0, 240.5], [66.333333333333329, 91.333333333333343, 275.0], [65.333333333333343, 104.33333333333331, 221.0], [68.0, 106.5, 267.5], [65.0, 140.0, 190.0], [66.0, 73.0, 282.0], [68.799999999999997, 144.0, 234.80000000000001], [70.0, 160.66666666666669, 223.0], [66

In [98]:
result_pts = np.array(result_pts)
print result_pts

[[ 235.   141.   241. ]
 [  55.   151.5  222. ]
 [  54.   138.   223. ]
 ..., 
 [ 322.   164.   300. ]
 [ 335.   117.   307. ]
 [  96.   144.   163. ]]


In [99]:
image_utils.array_to_plot(result_pts, point_size=1.5, outfile_name='Fear199_lambda_means')