In [None]:
from prevelop import preparation, exploration, clustering, evaluation

__Data Preparation and Preprocessing__

In [None]:
file = '/Users/kaspar/Documents/FIR/Prevelop/data/Simus/csvfolding-2025-03-19_13-23-25.csv'

In [None]:
### load cad-data
num_columns = ['Volumen','L','B','H','Da max.','Di min.','Lrot','Gesamtanzahl Bohrungen','Anzahl Außenabsätze', 
               'Gesamtanzahl Eindrehungen außen','Gesamtanzahl Eindrehungen innen','Fasenbreite rechts', 
               'Endenwinkel rechts','Anzahl Innenabsätze']
cat_columns = ['Klasse','Eindrehungsart außen','Eindrehungsanordnung außen','Absatzform','Bohrungsanordnung',
               'Anbringung Bohrungsanordnung','Bohrungsart','Ende rechts','Innenform']
data, num_columns, cat_columns = preparation.load_simus_data(file, num_columns, cat_columns)

In [None]:
data

In [None]:
# manually preprocess the data
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MaxAbsScaler

### prepare the data
df_num = data[num_columns]
df_cat = data[cat_columns]

# scale the numerical columns with MaxAbsScaler
scaler = MaxAbsScaler().fit(df_num)
df_num_scaled = pd.DataFrame(data=scaler.transform(df_num), index=df_num.index, columns=df_num.columns)

# concatenate the subdataframes columnwise
data_preprocessed = pd.concat([df_num_scaled, df_cat], axis=1)

In [None]:
data_preprocessed

__Exploration__

In [None]:
# show boxplots of the numerical columns
exploration.boxplots(data, num_columns)

In [None]:
# show violonplot of data
exploration.violinplots(data, num_columns)

In [None]:
# show histograms of categorical columns
exploration.distributions(data, num_columns)

In [None]:
# show parallel_coordinates_plot of data
# exploration.barplots(data, cat_columns)

In [None]:
# show heatmap of data
exploration.heatmap(data, num_columns)

In [None]:
# z-score analysis of data
exploration.z_score_analysis(data, num_columns)

In [None]:
# apply isolation forest to detect outliers
exploration.isolation_forest(data, num_columns)

In [None]:
# apply tsne_visualization to visualize the data
exploration.tsne_visualization(data_preprocessed, num_columns)

__Adjustments based on EDA__

In [None]:
# remove colums LRot H and Da max
data = data.drop(columns=['Lrot','H','Da max.'])
data_preprocessed = data_preprocessed.drop(columns=['Lrot','H','Da max.'])

__Clustering__

In [None]:
### calculate distance matrix
distance_matrix = clustering.gower_distance(data)

In [None]:
### plot the elbow plot for the first 20 clusters
clustering.elbow_plot_agglomerative(data_preprocessed, distance_matrix, 80)

In [None]:
### plot the dendrogram
clustering.plot_dendrogram(distance_matrix, labels=data.index, orientation='left')

In [None]:
### find clusters with k-medoids and apply elbow and shilouette method
clustering.elbow_plot_kmedoids(data_preprocessed, 70)

__Evaluation__

In [None]:
labels = clustering.agglomerative_clustering(distance_matrix, 66)

In [None]:
# # export the results to a excel file, dataframe with columms 'ID' and 'Cluster'
# df = pd.DataFrame(data={'ID': data.index, 'Cluster': labels})
# df.to_excel('results/results_roemheld_aggl_66.xlsx', index=False)

In [None]:
### Evaluate the clustering results
evaluation.evaluate_clustering(data_preprocessed, labels)

In [None]:
### visualize the feature importance
evaluation.feature_importance(data_preprocessed, labels)

In [None]:
### visualize the results
evaluation.plot_results_2d(data_preprocessed.values, labels)

In [None]:
labels = clustering.kmedoids_clustering(distance_matrix, 50)

In [None]:
# # export the results to a excel file, dataframe with columms 'ID' and 'Cluster'
# df = pd.DataFrame(data={'ID': data.index, 'Cluster': labels})
# df.to_excel('results/results_roemheld_kmedoids_50.xlsx', index=False)

In [None]:
### Evaluate the clustering results
evaluation.evaluate_clustering(data_preprocessed, labels)

In [None]:
### visualize the feature importance
evaluation.feature_importance(data_preprocessed, labels)

In [None]:
### visualize the results
evaluation.plot_results_2d(data_preprocessed.values, labels)