In [None]:
# import necessary functions and libraries
from helpers.readers import Reader
from helpers.visualization import Visualization3D
from sklearn.cluster import DBSCAN 
import open3d as o3d
import numpy as np 
import pandas as pd
import hdbscan
import plotly.express as px

# read data from 
reader = Reader('/Users/dianamindroc/Desktop/PhD/Data/suresim_simulations/IMOD_models/model_0')
reader.get_folders('0')

In [None]:
reader.get_files_from_folder(0)
reader.filter('.txt')
reader.set_file(1)

In [None]:
data = reader.read_txt(columns = 4)
data_xyz = reader.extract_xyz(column_names=[0,1,2])

In [None]:
len(data_xyz)

In [None]:
viz = Visualization3D(data_xyz).get_3d_scatter(color='red').show()

In [None]:
o3d_pc = o3d.io.read_point_cloud(reader.folder + '/' + reader.file, format = 'xyz')
downpc = o3d_pc.voxel_down_sample(voxel_size = 100)
type(downpc)
downpc_array = pd.DataFrame(np.asarray(downpc.points), columns = ['x','y','z'])

In [None]:
import os
o3d_pc = o3d.io.read_point_cloud(os.path.join(reader.folder, reader.file), format='xyz')
downpc = o3d_pc.uniform_down_sample(100)
downpc_array = pd.DataFrame(np.asarray(downpc.points), columns = ['x','y','z'])

In [None]:
len(downpc_array)

In [None]:
from helpers.visualization import Visualization3D

In [None]:
viz2 = Visualization3D(downpc_array).get_3d_scatter().show()

### Find optimal eps for DBSCAN

In [None]:
from sklearn.neighbors import NearestNeighbors
import numpy as np
import matplotlib.pyplot as plt 

nbrs = NearestNeighbors(n_neighbors=20).fit(data_xyz)
distances, indices = nbrs.kneighbors(downpc_array)
distances = distances[:,2]
distances = np.sort(distances, axis=0)
plt.plot(distances)

In [None]:
import hdbscan
clusterer = hdbscan.HDBSCAN(min_cluster_size=10, gen_min_span_tree=True, algorithm='best', alpha=0.7,metric='euclidean')
clusterer.fit(data_xyz)

In [None]:
clusterer.minimum_spanning_tree_.plot(edge_cmap='viridis', edge_alpha=0.6, node_size=10, edge_linewidth=2)

In [None]:
clusterer.outlier_scores_

In [None]:
import seaborn as sns
sns.displot(clusterer.outlier_scores_[np.isfinite(clusterer.outlier_scores_)])

In [None]:
threshold = pd.Series(clusterer.outlier_scores_).quantile(0.5)
outliers = np.where(clusterer.outlier_scores_ > threshold)[0]

In [None]:
visualizer = Visualization3D(data_xyz).get_3d_scatter(color=outliers, size=2).show()

### Denoise point cloud

In [None]:
pc = DBSCAN(eps = 100, min_samples = 6).fit(downpc_array)
print (pc)
type(pc)

In [None]:
downpc_array[pc.labels_ == -1]

In [None]:
#fig = px.scatter_3d(x = downpc_array['x'], y = downpc_array['y'], z = downpc_array['z'], color = pc.labels_)
viz3 = Visualization3D(downpc_array).get_3d_scatter(size=2, color=pc.labels_).show()

In [None]:
data_without_outliers = downpc_array.drop(downpc_array[pc.labels_==-1].index)

In [None]:
viz4 = Visualization3D(data_without_outliers).get_3d_scatter(size=2, color=pc.labels_[pc.labels_ != -1]).show()

### Construct graphs 

In [None]:
from sklearn.neighbors import KNeighborsTransformer
import networkx as nx
from pyvis.network import Network
import matplotlib.pyplot as plt

In [None]:
transformer = KNeighborsTransformer(n_neighbors = 6, algorithm='ball_tree')
transformer.fit_transform(data_without_outliers)

In [None]:
graph = transformer.kneighbors_graph()
nx_graph = nx.from_numpy_array(graph.toarray())
len(nx_graph.edges)

In [None]:
plt.imshow(graph.toarray(), cmap='jet')

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(nx_graph)
net.show('graph.html')

In [None]:
A=list(nx_graph.subgraph(c).copy() for c in nx.connected_components(nx_graph))

In [None]:
net = Network(notebook = True)
net.show_buttons(filter_=['physics'])
net.from_nx(A[0])
net.show('graph.html')