In [None]:
import pandas as pd 
import plotly
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from sklearn.cluster import DBSCAN
import os
import sys
from sklearn.metrics import silhouette_score
import open3d as o3d
from sklearn.decomposition import PCA



In [145]:

print(os.listdir("../data"))
print(os.getcwd())
print(os.listdir("../src"))
sys.path.append(os.path.abspath("../src"))

['lidar_cable_points_extrahard.parquet', 'lidar_cable_points_medium.parquet', 'lidar_cable_points_hard.parquet', 'lidar_cable_points_easy.parquet']
/Users/grace/catenary-wire-modeling/notebooks
['clustering.py', 'models.py', '__pycache__']


In [146]:
import clustering
from importlib import reload
reload(clustering)

from clustering import *

In [147]:
difficulties = ['easy', 'medium', 'hard', 'extrahard']
base_path = "../data/lidar_cable_points_{}.parquet"


cable_points = {
    level: pd.read_parquet(base_path.format(level))
    for level in difficulties
}

In [148]:
cable_points['medium']

Unnamed: 0,x,y,z
5117,8.927091,-17.271705,7.724551
3574,-0.514603,3.713301,6.604667
245,6.141806,-11.188338,10.438597
3258,6.998778,-10.146784,6.937106
5803,-7.537887,12.818930,7.195763
...,...,...,...
999,-11.987950,21.935129,11.501804
389,2.621246,-4.823306,10.109608
2330,3.355156,-8.010307,10.201061
1476,1.368493,-0.655320,9.983076


In [149]:
points3d = cable_points['medium'].values


In [150]:
hdbscan_labels = cluster_points_hdbscan(points3d)


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



In [151]:

plot_clusters_3d(points3d, hdbscan_labels)

In [116]:
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(points3d)

In [117]:
voxel_size = 0.05
downpcd = pcd.voxel_down_sample(voxel_size=voxel_size)


In [118]:
o3d.visualization.draw_geometries([downpcd])

In [119]:
from models import project_to_2d

points2d = project_to_2d(points3d)[0]

In [120]:
points2d


array([[-19.25452338,  -0.98836299],
       [  3.68914626,  -2.03274525],
       [-12.59025261,   1.7476432 ],
       ...,
       [ -8.46318735,   1.52407365],
       [ -1.05801724,   1.33008199],
       [-12.90056452,   1.79950369]])

In [121]:
fig = px.scatter(x=points2d[:, 0], y=points2d[:, 1], title="2D PCA Projection")
fig.update_layout(xaxis_title="PCA 1", yaxis_title="PCA 2")
fig.show()

In [122]:
points2d[:, 1]

array([-0.98836299, -2.03274525,  1.7476432 , ...,  1.52407365,
        1.33008199,  1.79950369])

In [134]:
z = points3d[:, 2]  # Z-axis values

df = pd.DataFrame({'z': z, 'index': range(len(z))})

fig = px.scatter(df, x='index', y='z', title='Projection onto Z-Axis')
fig.update_layout(xaxis_title='Point Index', yaxis_title='Z Value')
fig.show()

In [143]:
import plotly.express as px

fig = px.histogram(x=z, nbins=100, title="Z Distribution Histogram")
fig.update_layout(xaxis_title='Z Value', yaxis_title='Count')
fig.show()

In [185]:
major_clusterer = hdbscan.HDBSCAN()
major_labels = major_clusterer.fit_predict(points3d)
df = pd.DataFrame(points3d, columns=["x", "y", "z"])
df["major_label"] = major_labels


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.


'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.



In [186]:
df["major_label"].unique()

array([1, 0])

In [None]:
# For each major cluster, project & cluster wires
final_labels = np.full(len(df), -1)  # Initialize with -1 (noise)
wire_counter = 0

for major_label in np.unique(major_labels):
    if major_label == -1:
        continue  

    cluster_mask = df["major_label"] == major_label
    cluster_points = points3d[cluster_mask]
    print(cluster_points.shape)

    # --- Project to 2D ( XY( axis selction )) ---
    points2d = cluster_points[:, [0, 1]]
    print(points2d.shape)
    df_xy = pd.DataFrame(points2d, columns=["x", "y"])

    # Plot
    fig = px.scatter(df_xy, x="x", y="y", title="Cluster X-Y Projection")
    fig.update_layout(xaxis_title="X", yaxis_title="Y")
    fig.show()

    # --- Cluster 1D along PCA axis 2 (usually height or separation axis) ---
    pca = PCA(n_components=2)
    points2d = pca.fit_transform(points2d)
    # Step 2: Cluster along axis 1 (orthogonal to wire direction)
    wire_labels = DBSCAN(eps=0.05, min_samples=5).fit_predict(points2d[:, 1].reshape(-1, 1))

    

    # ---  Assign final labels globally ---
    final_labels[cluster_mask] = wire_labels + wire_counter
    wire_counter += wire_labels.max() + 1  # increment to avoid label overlap


(1230, 3)
(1230, 2)


(1573, 3)
(1573, 2)


In [189]:
df_plot = pd.DataFrame(points3d, columns=["x", "y", "z"])
df_plot["wire_label"] = final_labels


fig = px.scatter_3d(df_plot, x="x", y="y", z="z", color="wire_label", title="All Clustered Wires")
fig.update_traces(marker=dict(size=1))  
fig.show()

In [206]:

points3d = cable_points['easy'].values
print(points3d.shape)

points2d = points3d[:, [0, 1]]
print(points2d.shape)

pca = PCA(n_components=2)
points2d = pca.fit_transform(points2d)
# Step 2: Cluster along axis 1 (orthogonal to wire direction)
labels = DBSCAN(eps=0.05, min_samples=5).fit_predict(points2d[:, 1].reshape(-1, 1))


(1502, 3)
(1502, 2)


In [207]:
df = pd.DataFrame(points2d, columns=['pca1', 'pca2'])
df['wire_label'] = labels

fig = px.scatter(df, x='pca1', y='pca2', color='wire_label', title='Wire Separation via PCA + DBSCAN')
fig.show()

In [None]:
"""works for all beside medium"""
# labels = hdbscan.HDBSCAN(min_cluster_size=10).fit_predict(points2d)
# labels = DBSCAN(eps=0.05, min_samples=5).fit_predict(points2d[:, 1].reshape(-1, 1))

labels = DBSCAN(eps=0.03, min_samples=5).fit_predict(points3d[:, 2].reshape(-1, 1))


In [129]:
x=points2d[:, 0]
y=points2d[:, 1]
unique_labels = np.unique(labels)


traces = []
for label in unique_labels:
    
    mask = labels == label
    traces.append(
        go.Scatter(
            x=x[mask],
            y=y[mask],
            mode='markers',
            name=f'Cluster {label}' if label != -1 else 'Noise',
            marker=dict(size=6)
        )
    )

# Create figure and show
fig = go.Figure(data=traces)
fig.update_layout(
    title='2D DBSCAN Clusters',
    xaxis_title='PCA 1',
    yaxis_title='PCA 2',
    legend_title='Cluster'
)
fig.show()
