In [17]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import plotly.express as px
from faker import Faker
import random

# Faker für Dummy-Daten
fake = Faker()

In [18]:
# Dummy-Daten erstellen
customers = [f"CUST_{i:04d}" for i in range(1, 201)]
data = {
    "Customer_ID": customers,
    "Total_Revenue": [random.randint(50, 5000) for _ in range(200)],
    "Purchase_Count": [random.randint(1, 20) for _ in range(200)],
    "Pref_Category": [random.choice(["Electronics", "Clothing"]) for _ in range(200)]
}
df = pd.DataFrame(data)
df.to_csv("../data/customer_data.csv", index=False)
print("Daten erstellt: customer_data.csv")

Daten erstellt: customer_data.csv


In [19]:
# Daten laden
df = pd.read_csv("../data/customer_data.csv")

# Features für Clustering
X = df[["Total_Revenue", "Purchase_Count"]]

# K-Means anwenden
kmeans = KMeans(n_clusters=3, random_state=42)
df["Cluster"] = kmeans.fit_predict(X)

# Cluster beschreiben
df["Cluster_Name"] = df["Cluster"].map({
    0: "High-Spenders",
    1: "Gelegenheitskäufer",
    2: "Mittlere Kunden"
})
print(df.head())

  Customer_ID  Total_Revenue  Purchase_Count Pref_Category  Cluster  \
0   CUST_0001           3844               5   Electronics        0   
1   CUST_0002           1726              12      Clothing        2   
2   CUST_0003           3763              16      Clothing        0   
3   CUST_0004           2508              10   Electronics        2   
4   CUST_0005           4858               5   Electronics        0   

      Cluster_Name  
0    High-Spenders  
1  Mittlere Kunden  
2    High-Spenders  
3  Mittlere Kunden  
4    High-Spenders  


In [None]:
# 3D-Plot mit Plotly
fig = px.scatter_3d(df, x="Total_Revenue", y="Purchase_Count", z="Cluster", 
                    color="Cluster_Name", 
                    title="Kundensegmentierung: High-Spenders, Gelegenheitskäufer & mehr",
                    labels={"Total_Revenue": "Umsatz (CHF)", "Purchase_Count": "Käufe", "Cluster": "Segment"},
                    size_max=10, opacity=0.7)

fig.update_layout(
    scene=dict(
        xaxis_title="Umsatz (CHF)",
        yaxis_title="Anzahl Käufe",
        zaxis_title="Cluster"
    ),
    title_font_size=20,
    template="plotly_dark"
)

fig.write_html("../visualizations/customer_segments_3d.html")
fig.write_image("../visualizations/customer_segments_3d.png", scale=3)
fig.show()

ValueError: 
Image export using the "kaleido" engine requires the kaleido package,
which can be installed using pip:
    $ pip install -U kaleido
