In [56]:
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from helper.database_connection_helper import get_collection
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline


collection = get_collection()

In [57]:
collection_filter = {
    'configuration': 'Africa'
}

In [None]:
documents = collection.find(collection_filter)
    
all_data = []

for doc in documents:
    print(doc['_id'])
    
    # Hinzufügen der Daten aus jedem Dokument zur Liste
    for defect_channel, distance, magnetization, timestamp in zip(doc['defect_channel'], doc['distance'], doc['magnetization'], doc['timestamp']):
        all_data.append({
            'defect_channel': defect_channel,
            'distance': distance,
            'magnetization': magnetization,
            'timestamp': timestamp
        })
        
df = pd.DataFrame(all_data)

if 'timestamp' in df.columns:
    df = df.drop(columns=['timestamp'])
    
# Datenzauberei
imputer = SimpleImputer(strategy='mean')
scaler = StandardScaler()
pipeline = make_pipeline(imputer, scaler)
df_preprocessed = pipeline.fit_transform(df)

# Aktualisieren der Perplexität basierend auf der Anzahl der Samples
n_samples = df_preprocessed.shape[0]
perplexity_value = min(30, max(5, int(n_samples / 3)))  

# Anwenden von t-SNE auf die vorverarbeiteten Daten
tsne = TSNE(n_components=2, perplexity=perplexity_value, random_state=42)
tsne_results = tsne.fit_transform(df_preprocessed)

# Durchführen des KMeans Clustering auf den Ergebnissen von t-SNE
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(tsne_results)

# Visualisierung der Clustering-Ergebnisse
plt.figure(figsize=(8, 6))
plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=clusters, cmap='viridis')
plt.title("Clustering Results")
plt.xlabel("TSNE-1")
plt.ylabel("TSNE-2")
plt.show()

000bc7ea-8918-420c-bd13-30e1a6e8dd00
00fa1d39-bdd9-45e3-81f6-6f52a1ca57b3
0245cbf0-faec-4708-a1f2-3ccf7ebec1e1
02498907-b394-4b7c-a0ab-068692746591
03733bc2-dc08-4ce0-8553-f2f0f26323f8
038e3e8f-0693-43ea-abaa-6357c078f87a
03c9fad5-b891-4386-8f96-b1609e293bbc
057cbcdd-e626-4703-8848-da770f70f92a
06425d17-14d3-424d-afe4-ad487f1b31c5
077b5e9d-4050-403f-b6c3-9142605ecb35
08094259-5442-4852-9a79-ea47ce08084f
0849cadc-b74c-4823-a3c8-b11d45e8e647
097a5f5e-afc6-4f6b-ad40-6732c15b4e8f
0a30c4ec-d646-464b-aa97-39c84273cba7
0a67c432-c4ca-4254-aed8-78010b2032c2
0ae254f6-4f99-4926-8ace-094183f2509a
0d5e353c-88bc-4fe4-b843-6de25b89b931
0fa9dc7d-bc3f-4105-86ae-0e6e77e9c846
0fe6204d-9215-4c3d-8c69-363495c34a63
11b07a59-36f3-44e3-a3a1-61db22710d00
11b9825d-4110-4e2e-a357-336e7401310f
12c01245-4760-4add-bf87-56b9a5eeb085
13351c05-2155-48d9-9dfd-a8b0663184bf
158eb82a-9e78-42d0-a27e-57ce5f564118
15e645e2-0a59-4dd0-8e07-6e29b5dba7fe
1629474b-19bc-43ed-9d30-0f09aa755509
16cb7b61-2e44-4ee8-aa0a-396007ffdab6
1