# 04_stylo_classification.ipynb

**Stylo‑like Hierarchical Clustering on Stylometric Features**

Implements a delta‑style tree in pure Python, avoiding external stylo package.

In [None]:
from pathlib import Path

def setup_project_paths():
    current_dir = Path().cwd()
    base_path = current_dir.parent if current_dir.name == 'codigo' else current_dir
    input_path = base_path / 'corpus' / 'tei'
    output_path = base_path / 'resultados' / 'computational-analysis'
    return base_path, input_path, output_path

BASE_PATH, INPUT_PATH, OUTPUT_PATH = setup_project_paths()

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt

# Load stylometric features CSV
stylopath = OUTPUT_PATH / 'corpus_summary' / 'csv' / 'corpus_stylometric_features.csv'
df_stylo = pd.read_csv(stylopath)

# Identify key column
key = df_stylo.columns[0]

# Select numeric stylometric features
X = df_stylo.select_dtypes(include='number')

# Standardize features (z-scores)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Compute pairwise distances using Manhattan (Delta-like)
from scipy.spatial.distance import pdist
dist_matrix = pdist(X_scaled, metric='cityblock')

# Hierarchical clustering
Z = linkage(dist_matrix, method='ward')

# Plot and save dendrogram
plt.figure(figsize=(10,6))
dendrogram(Z, labels=df_stylo[key].tolist(), leaf_rotation=90)
plt.title("Hierarchical Clustering on Stylometric Features")
plt.tight_layout()
vis_path = OUTPUT_PATH / 'individual_analyses' / 'visualizations'
vis_path.mkdir(parents=True, exist_ok=True)
plt.savefig(vis_path / 'delta_like_tree.png')
plt.show()