# PrEvelOp - Exploration Demo

Automatisierte Datenaufbereitung, explorative Analyse, Clusterbildung und Evaluierung
für gemischte Datentypen (numerische und kategorische Merkmale).


In [None]:
from prevelop.data import generate_toy_dataset
from prevelop.preparation import preprocessing
from prevelop.exploration import boxplots, distributions, heatmap, tsne_visualization
from prevelop.clustering import gower_distance, agglomerative_clustering, kmedoids_clustering
from prevelop.evaluation import evaluate_clustering, plot_results_2d, feature_importance


## 1. Daten laden


In [None]:
### generate synthetic manufacturing dataset
data, num_columns, cat_columns = generate_toy_dataset(n_samples=300)
print(f"Dataset: {data.shape[0]} samples, {data.shape[1]} features")
print(f"Numerical: {num_columns}")
print(f"Categorical: {cat_columns}")
data.head(10)


## 2. Explorative Analyse


In [None]:
### boxplots for numerical features
boxplots(data, num_columns)


In [None]:
### feature distributions
distributions(data, num_columns)


In [None]:
### correlation heatmap
heatmap(data, num_columns, method="pearson")


## 3. Preprocessing


In [None]:
### scale numerical and encode categorical features
data_preprocessed = preprocessing(data, num_columns, cat_columns)
print(f"Preprocessed shape: {data_preprocessed.shape}")
data_preprocessed.head()


## 4. Clustering


In [None]:
### compute Gower distance matrix
distance = gower_distance(data_preprocessed)
print(f"Distance matrix shape: {distance.shape}")


In [None]:
### agglomerative clustering
labels_agg = agglomerative_clustering(distance, nr_cluster=5)
print(f"Agglomerative clusters: {len(set(labels_agg))}")


In [None]:
### k-medoids clustering
labels_kmed = kmedoids_clustering(distance, nr_cluster=5)
print(f"K-Medoids clusters: {len(set(labels_kmed))}")


## 5. Evaluierung


In [None]:
### evaluate agglomerative clustering
scores_agg = evaluate_clustering(data_preprocessed.values, labels_agg)
print("Agglomerative Clustering:")
for metric, score in scores_agg.items():
    print(f"  {metric}: {score:.4f}")


In [None]:
### evaluate k-medoids clustering
scores_kmed = evaluate_clustering(data_preprocessed.values, labels_kmed)
print("K-Medoids Clustering:")
for metric, score in scores_kmed.items():
    print(f"  {metric}: {score:.4f}")


In [None]:
### visualize clusters in 2D
plot_results_2d(data_preprocessed, labels_agg)


In [None]:
### feature importance analysis
feature_importance(data_preprocessed, labels_agg)
