# 🧠 Notebook 05 – Clustering in the Symbolic Manifold

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns
import umap
from pathlib import Path

# Load symbolic metrics
metrics_path = Path("/Users/demetriosagourakis/Library/Mobile Documents/com~apple~CloudDocs/Biologia Fractal/entropic-symbolic-society/NHB_Symbolic_Mainfold/data/symbolic_metrics.csv")
metrics_df = pd.read_csv(metrics_path)

# Filter invalid entries
invalid_mask = metrics_df['word'].str.contains(r"no more responses|NORESP|no response|I don't know", case=False, na=False)
invalid_mask |= metrics_df['word'].str.strip().eq('') | metrics_df['word'].str.contains(r'^\W+$', na=False)
valid_df = metrics_df[~invalid_mask].copy()

# Standardize
X = valid_df[['anchoring_alpha', 'curvature_kappa', 'entropy_Er']].values
X_scaled = StandardScaler().fit_transform(X)

# PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Save for future clustering
np.save("/Users/demetriosagourakis/Library/Mobile Documents/com~apple~CloudDocs/Biologia Fractal/entropic-symbolic-society/NHB_Symbolic_Mainfold/data/X_pca.npy", X_pca)
valid_df.to_csv("/Users/demetriosagourakis/Library/Mobile Documents/com~apple~CloudDocs/Biologia Fractal/entropic-symbolic-society/NHB_Symbolic_Mainfold/data/filtered_metrics.csv", index=False)

print("✅ PCA completed and saved.")


  from .autonotebook import tqdm as notebook_tqdm
  import pkg_resources


✅ PCA completed and saved.
