### Imports

In [None]:
# import necessary functions and libraries
from helpers.readers import Reader
from helpers.visualization import Visualization3D
from sklearn.cluster import DBSCAN 
import open3d as o3d
import numpy as np 
import pandas as pd

### Read desired file

In [None]:
# read data from
reader = Reader('/home/dim26fa/data/imod_models/mitochondria/train/')
reader.get_folders()

In [None]:
reader.set_folder(2)

In [None]:
reader.get_files_from_folder()

In [None]:
reader.set_file(0)

In [None]:
reader.read_txt(columns=4)

In [None]:
reader.extract_xyz(column_names=[0,1,2])

In [None]:
#reader.file = '/home/dim26fa/data/ries_lab_data/201'
reader.data = pd.read_csv('/home/dim26fa/data/imod_models/mitochondria/train/sample_0/mitochondriaLocalizations.txt')
data_xyz = reader.extract_xyz(column_names=['Pos_x','Pos_y','Pos_z'])

In [None]:
viz = Visualization3D(reader.df_xyz).get_3d_scatter(color='blue').show()

In [None]:
reader.file.split(sep='.')

In [None]:
xyz = data_xyz.to_numpy()

### Point cloud downsampling

In [None]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(xyz)

In [None]:
pcd

In [None]:
labels = np.array(pcd.cluster_dbscan(eps=1, min_points=50))

In [None]:
len(labels)

In [None]:
import matplotlib.pyplot as plt

In [None]:
max_label = labels.max()
colors = plt.get_cmap("tab20")(labels / (max_label 
if max_label > 0 else 1))

In [None]:
colors[labels < 0] = 0
pcd.colors = o3d.utility.Vector3dVector(colors[:, :3])

In [None]:
pcd

In [None]:
down_pcd = pcd.voxel_down_sample(voxel_size=100)
downpc_array = pd.DataFrame(np.asarray(down_pcd.points), columns = ['x','y','z'])

In [None]:
o3d.visualization.draw_geometries([down_pcd])

In [None]:
o3d_pc = o3d.io.read_point_cloud(reader.file, format = 'xyz')

In [None]:
downpc = pcd.uniform_down_sample(100)
downpc_array = pd.DataFrame(np.asarray(downpc.points), columns = ['x','y','z'])

In [None]:
downpc

In [None]:
reader.file

In [None]:
o3d_pc = o3d.io.read_point_cloud(reader.file.split(sep='.')[0]+'.txt', format='xyz')
downpc = o3d_pc.uniform_down_sample(100)
downpc_array = pd.DataFrame(np.asarray(downpc.points), columns = ['x [nm]','y [nm]','z [nm]'])

In [None]:
o3d_pc

In [None]:
viz2 = Visualization3D(downpc_array).get_3d_scatter(size=1).show()

### Find optimal eps for DBSCAN

In [None]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
import matplotlib.pyplot as plt 

nbrs = NearestNeighbors(n_neighbors=20).fit(data_xyz)
distances, indices = nbrs.kneighbors(downpc_array)
distances = distances[:,2]
distances = np.sort(distances, axis=0)
plt.plot(distances)

### Try out HDBSCAN for denoising

In [None]:
import hdbscan
clusterer = hdbscan.HDBSCAN(min_cluster_size=40, min_samples=None, algorithm='best', alpha=0.7,metric='euclidean')
clusterer.fit(data_xyz)

In [None]:
clusterer.labels_

In [None]:
len(clusterer.labels_)

In [None]:
type(clusterer.labels_)

In [None]:
np.unique(clusterer.labels_)

In [None]:
import seaborn as sns

In [None]:
threshold = pd.Series(clusterer.outlier_scores_).quantile(0.8)
outliers = np.where(clusterer.outlier_scores_ > threshold)[0]

In [None]:
color_palette = sns.color_palette('deep', 3508)
cluster_colors = [color_palette[x] if x >= 0
                  else (0.5, 0.5, 0.5)
                  for x in clusterer.labels_]
vis = Visualization3D(data_xyz).get_3d_scatter(color = cluster_colors, size = 1).show()

### Extract clusters

In [None]:
clusterer.labels_[200]

In [None]:
itemindex = np.where(clusterer.labels_ == clusterer.labels_[1])

In [None]:
np.unique(clusterer.labels_)

In [None]:
cluster = data_xyz.iloc[itemindex]

In [None]:
type(clusterer.labels_s_)

In [None]:
data_xyz.insert(3,"label",clusterer.labels_)

In [None]:
data_xyz.to_csv('/home/dim26fa/data/ries_lab_data/labeled/labeled_201008_U2OS-Nup96-SNAP-AF647-anti-Elys-CF660C_cell3_sml.csv')

In [None]:
cluster

In [None]:
path = '/home/dim26fa/data/ries_lab_data/extracted_clusters/hdbscan40_201008_U2OS-Nup96-SNAP-AF647-anti-Elys-CF660C_cell1_sml/'

In [None]:
for label in clusterer.labels_:
    indices = np.where(clusterer.labels_ == label)
    cluster = data_xyz.iloc[indices]
    cluster.to_csv(path + str(label) + '.csv', index=False)

In [None]:
reader.data

In [None]:
reader.data = pd.read_csv('/home/dim26fa/data/ries_lab_data/extracted_clusters/hdbscan40_201008_U2OS-Nup96-SNAP-AF647-anti-Elys-CF660C_cell1_sml/1356.csv')
data_xyz_cluster = reader.extract_xyz(column_names=['x','y','z'])

In [None]:
reader.data

In [None]:
vis_cluster = Visualization3D(data_xyz_cluster).get_3d_scatter(size=2, color='red').show()

### Denoise point cloud

In [None]:
outliers

In [None]:
data_without_outliers = downpc_array.drop(downpc_array.index[outliers])

In [None]:
viz4 = Visualization3D(data_without_outliers).get_3d_scatter(size=2, color='red').show()

### Construct graphs 

In [None]:
from sklearn.neighbors import KNeighborsTransformer
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt

In [None]:
pc_size = len(data_without_outliers)

In [None]:
transformer = KNeighborsTransformer(n_neighbors = 25, algorithm='kd_tree')
transformer.fit_transform(data_xyz_cluster)

In [None]:
graph = transformer.kneighbors_graph()
nx_graph = nx.from_numpy_array(graph.toarray())
len(nx_graph.edges)

In [None]:
plt.imshow(graph.toarray(), cmap='jet')

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(nx_graph)
net.show('graph.html')

In [None]:
A=list(nx_graph.subgraph(c).copy() for c in nx.connected_components(nx_graph))

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(A[0])
net.show('graph.html')