In [2]:
import pandas as pd
from rapidfuzz import fuzz, distance
from collections import Counter
import re
from tqdm import tqdm
import os

# === 1. Load CSV ===
csv_file = "20250903_Extrait_Constatations_F2.csv"
df = pd.read_csv(csv_file, delimiter=';', dtype=str)
df.columns = df.columns.str.strip()
comment_col = "Commentaire"
df = df.dropna(subset=[comment_col])

# === 2. Text preprocessing helpers ===
def clean_words(text):
    if pd.isna(text):
        return []
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', ' ', text)
    words = text.split()
    words = [w for w in words if len(w) > 1]  # ignore 1-letter words
    return words

def generate_clusters(words, min_len=2, max_len=5):
    clusters = []
    for n in range(min_len, min(max_len+1, len(words)+1)):
        for i in range(len(words)-n+1):
            cluster = ' '.join(words[i:i+n])
            # skip clusters with all single-letter words
            if all(len(w) == 1 for w in cluster.split()):
                continue
            clusters.append(cluster)
    return clusters

# === 3. Extract clusters from Commentaire ===
print("Extracting clusters from Commentaire...")
all_clusters = []
for com in tqdm(df[comment_col], desc="Extracting"):
    words = clean_words(com)
    clusters = generate_clusters(words, min_len=2, max_len=5)
    all_clusters.extend(clusters)

# Count exact occurrences
cluster_counter = Counter(all_clusters)
cluster_candidates = {k:v for k,v in cluster_counter.items() if v >= 7}
print(f"→ Found {len(cluster_candidates)} unique clusters with ≥7 occurrences.")

# === 4. Fuzzy clustering parameters ===
threshold_similarity = 90  # %
max_typo_chars = 4         # edit distance threshold
checkpoint_every = 50
checkpoint_file = "checkpoint_commentaire_clusters.csv"

# === 5. Resume from checkpoint if exists ===
results = []
processed = set()
if os.path.exists(checkpoint_file):
    checkpoint_df = pd.read_csv(checkpoint_file, sep=';')
    processed = set(checkpoint_df["Cluster_Representative"].tolist())
    results = checkpoint_df.to_dict('records')
    print(f"Resuming from checkpoint → {len(processed)} clusters already processed.")

# === 6. Fuzzy clustering ===
cluster_groups = {}
candidates_list = list(cluster_candidates.keys())

print("Performing fuzzy clustering with RapidFuzz...")
for i, cluster in enumerate(tqdm(candidates_list, desc="Fuzzy clustering")):
    if cluster in processed:
        continue

    found = False
    for key in cluster_groups.keys():
        similarity = fuzz.ratio(cluster, key)
        edit_dist = distance.Levenshtein.distance(cluster, key)
        if similarity >= threshold_similarity or edit_dist <= max_typo_chars:
            cluster_groups[key].append(cluster)
            found = True
            break
    if not found:
        cluster_groups[cluster] = [cluster]

    # === Checkpoint saving ===
    if (i + 1) % checkpoint_every == 0:
        partial_results = []
        total_counts = sum(cluster_counter.values())
        for key, variants in cluster_groups.items():
            count = sum([cluster_counter[v] for v in variants])
            per_mille = round(count / total_counts * 1000, 2)
            partial_results.append({
                "Cluster_Representative": key,
                "Count": count,
                "Variations": '|'.join(variants),
                "PerMille": per_mille
            })
        pd.DataFrame(partial_results).to_csv(checkpoint_file, sep=';', index=False)
        print(f"Checkpoint saved at {i+1} clusters → {checkpoint_file}")

# === 7. Final aggregation ===
print("Finalizing results...")
total_counts = sum(cluster_counter.values())
results = []
for key, variants in tqdm(cluster_groups.items(), desc="Final aggregation"):
    count = sum([cluster_counter[v] for v in variants])
    per_mille = round(count / total_counts * 1000, 2)
    results.append({
        "Cluster_Representative": key,
        "Count": count,
        "Variations": '|'.join(variants),
        "PerMille": per_mille
    })

summary_df = pd.DataFrame(results).sort_values(by="Count", ascending=False)
summary_df.to_csv("commentaire_clusters_summary_final.csv", sep=';', index=False)
display(summary_df)
print("✅ Saved final results as 'commentaire_clusters_summary_final.csv'")


Extracting clusters from Commentaire...


Extracting: 100%|█████████████████████████████████████████████████████████████| 27665/27665 [00:00<00:00, 43328.58it/s]


→ Found 4671 unique clusters with ≥7 occurrences.
Performing fuzzy clustering with RapidFuzz...


Fuzzy clustering:   2%|█                                                            | 84/4671 [00:00<00:05, 827.27it/s]

Checkpoint saved at 50 clusters → checkpoint_commentaire_clusters.csv
Checkpoint saved at 100 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:   4%|██▏                                                         | 167/4671 [00:00<00:09, 489.41it/s]

Checkpoint saved at 150 clusters → checkpoint_commentaire_clusters.csv
Checkpoint saved at 200 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:   6%|███▍                                                        | 270/4671 [00:00<00:12, 341.28it/s]

Checkpoint saved at 250 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:   7%|████▎                                                       | 337/4671 [00:01<00:17, 248.84it/s]

Checkpoint saved at 300 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:   8%|████▉                                                       | 387/4671 [00:01<00:20, 214.05it/s]

Checkpoint saved at 350 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:   9%|█████▎                                                      | 409/4671 [00:01<00:21, 196.04it/s]

Checkpoint saved at 400 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  10%|█████▉                                                      | 464/4671 [00:01<00:26, 161.55it/s]

Checkpoint saved at 450 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  11%|██████▌                                                     | 510/4671 [00:02<00:32, 128.28it/s]

Checkpoint saved at 500 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  12%|███████▎                                                    | 574/4671 [00:02<00:28, 143.96it/s]

Checkpoint saved at 550 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  13%|███████▉                                                    | 620/4671 [00:03<00:25, 162.02it/s]

Checkpoint saved at 600 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  14%|████████▌                                                   | 670/4671 [00:03<00:31, 126.82it/s]

Checkpoint saved at 650 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  16%|█████████▎                                                  | 726/4671 [00:03<00:28, 136.76it/s]

Checkpoint saved at 700 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  17%|█████████▉                                                  | 777/4671 [00:04<00:25, 151.90it/s]

Checkpoint saved at 750 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  17%|██████████▍                                                 | 813/4671 [00:04<00:24, 159.48it/s]

Checkpoint saved at 800 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  18%|███████████                                                 | 857/4671 [00:04<00:32, 117.29it/s]

Checkpoint saved at 850 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  20%|███████████▉                                                 | 912/4671 [00:05<00:42, 88.19it/s]

Checkpoint saved at 900 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  21%|████████████▋                                                | 970/4671 [00:06<00:44, 83.94it/s]

Checkpoint saved at 950 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  22%|████████████▉                                               | 1009/4671 [00:06<00:43, 84.98it/s]

Checkpoint saved at 1000 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  23%|█████████████▌                                              | 1055/4671 [00:07<00:37, 95.80it/s]

Checkpoint saved at 1050 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  24%|██████████████▏                                             | 1109/4671 [00:08<01:03, 56.32it/s]

Checkpoint saved at 1100 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  25%|███████████████                                             | 1170/4671 [00:08<00:38, 90.02it/s]

Checkpoint saved at 1150 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  26%|███████████████▌                                            | 1208/4671 [00:09<00:48, 71.47it/s]

Checkpoint saved at 1200 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  27%|████████████████                                            | 1255/4671 [00:10<01:13, 46.55it/s]

Checkpoint saved at 1250 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  28%|████████████████▊                                           | 1307/4671 [00:11<01:02, 54.16it/s]

Checkpoint saved at 1300 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  29%|█████████████████▌                                          | 1367/4671 [00:12<00:50, 65.29it/s]

Checkpoint saved at 1350 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  30%|██████████████████▏                                         | 1413/4671 [00:13<00:41, 78.61it/s]

Checkpoint saved at 1400 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  31%|██████████████████▊                                         | 1467/4671 [00:14<00:47, 67.28it/s]

Checkpoint saved at 1450 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  32%|███████████████████▎                                        | 1506/4671 [00:15<01:11, 44.30it/s]

Checkpoint saved at 1500 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  33%|███████████████████▉                                        | 1556/4671 [00:15<00:56, 54.98it/s]

Checkpoint saved at 1550 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  34%|████████████████████▌                                       | 1605/4671 [00:16<00:55, 55.48it/s]

Checkpoint saved at 1600 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  35%|█████████████████████▎                                      | 1657/4671 [00:17<00:53, 56.62it/s]

Checkpoint saved at 1650 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  37%|█████████████████████▉                                      | 1705/4671 [00:18<01:07, 44.18it/s]

Checkpoint saved at 1700 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  38%|██████████████████████▋                                     | 1762/4671 [00:20<00:47, 61.75it/s]

Checkpoint saved at 1750 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  39%|███████████████████████▏                                    | 1805/4671 [00:20<01:00, 47.28it/s]

Checkpoint saved at 1800 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  40%|███████████████████████▊                                    | 1853/4671 [00:21<00:57, 49.03it/s]

Checkpoint saved at 1850 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  41%|████████████████████████▌                                   | 1908/4671 [00:23<00:54, 50.59it/s]

Checkpoint saved at 1900 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  42%|█████████████████████████▎                                  | 1966/4671 [00:24<00:38, 70.24it/s]

Checkpoint saved at 1950 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  43%|█████████████████████████▋                                  | 2002/4671 [00:24<00:47, 56.40it/s]

Checkpoint saved at 2000 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  44%|██████████████████████████▎                                 | 2053/4671 [00:25<01:02, 41.69it/s]

Checkpoint saved at 2050 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  45%|███████████████████████████                                 | 2103/4671 [00:27<01:11, 36.06it/s]

Checkpoint saved at 2100 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  46%|███████████████████████████▊                                | 2162/4671 [00:29<01:09, 36.30it/s]

Checkpoint saved at 2150 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  47%|████████████████████████████▎                               | 2201/4671 [00:29<01:01, 39.93it/s]

Checkpoint saved at 2200 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  48%|█████████████████████████████                               | 2258/4671 [00:31<00:55, 43.24it/s]

Checkpoint saved at 2250 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  49%|█████████████████████████████▌                              | 2305/4671 [00:32<00:49, 47.64it/s]

Checkpoint saved at 2300 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  50%|██████████████████████████████▏                             | 2353/4671 [00:33<01:14, 31.01it/s]

Checkpoint saved at 2350 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  51%|██████████████████████████████▊                             | 2398/4671 [00:34<00:50, 45.10it/s]

Checkpoint saved at 2400 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  53%|███████████████████████████████▌                            | 2454/4671 [00:36<01:04, 34.36it/s]

Checkpoint saved at 2450 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  54%|████████████████████████████████▏                           | 2504/4671 [00:37<01:00, 35.69it/s]

Checkpoint saved at 2500 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  55%|████████████████████████████████▊                           | 2556/4671 [00:39<00:49, 42.75it/s]

Checkpoint saved at 2550 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  56%|█████████████████████████████████▍                          | 2603/4671 [00:41<01:10, 29.36it/s]

Checkpoint saved at 2600 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  57%|██████████████████████████████████▏                         | 2659/4671 [00:42<00:36, 55.53it/s]

Checkpoint saved at 2650 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  58%|██████████████████████████████████▊                         | 2707/4671 [00:44<00:54, 35.74it/s]

Checkpoint saved at 2700 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  59%|███████████████████████████████████▍                        | 2754/4671 [00:46<01:00, 31.71it/s]

Checkpoint saved at 2750 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  60%|████████████████████████████████████                        | 2804/4671 [00:47<00:57, 32.57it/s]

Checkpoint saved at 2800 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  61%|████████████████████████████████████▋                       | 2854/4671 [00:49<01:03, 28.40it/s]

Checkpoint saved at 2850 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  62%|█████████████████████████████████████▎                      | 2904/4671 [00:51<01:20, 21.93it/s]

Checkpoint saved at 2900 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  63%|█████████████████████████████████████▉                      | 2957/4671 [00:53<00:46, 36.63it/s]

Checkpoint saved at 2950 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  64%|██████████████████████████████████████▌                     | 3005/4671 [00:55<01:04, 25.84it/s]

Checkpoint saved at 3000 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  65%|███████████████████████████████████████▏                    | 3050/4671 [00:56<00:46, 34.82it/s]

Checkpoint saved at 3050 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  66%|███████████████████████████████████████▉                    | 3105/4671 [00:59<01:02, 24.91it/s]

Checkpoint saved at 3100 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  67%|████████████████████████████████████████▍                   | 3151/4671 [01:00<00:50, 30.23it/s]

Checkpoint saved at 3150 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  69%|█████████████████████████████████████████▏                  | 3203/4671 [01:04<01:21, 17.92it/s]

Checkpoint saved at 3200 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  70%|█████████████████████████████████████████▊                  | 3254/4671 [01:05<00:51, 27.26it/s]

Checkpoint saved at 3250 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  71%|██████████████████████████████████████████▍                 | 3302/4671 [01:08<01:18, 17.46it/s]

Checkpoint saved at 3300 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  72%|███████████████████████████████████████████                 | 3352/4671 [01:11<01:12, 18.24it/s]

Checkpoint saved at 3350 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  73%|███████████████████████████████████████████▋                | 3402/4671 [01:13<01:16, 16.53it/s]

Checkpoint saved at 3400 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  74%|████████████████████████████████████████████▎               | 3454/4671 [01:15<00:50, 24.09it/s]

Checkpoint saved at 3450 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  75%|█████████████████████████████████████████████               | 3505/4671 [01:17<00:38, 30.19it/s]

Checkpoint saved at 3500 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  76%|█████████████████████████████████████████████▋              | 3552/4671 [01:19<00:47, 23.63it/s]

Checkpoint saved at 3550 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  77%|██████████████████████████████████████████████▎             | 3608/4671 [01:20<00:22, 46.62it/s]

Checkpoint saved at 3600 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  78%|██████████████████████████████████████████████▉             | 3655/4671 [01:22<00:33, 30.09it/s]

Checkpoint saved at 3650 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  79%|███████████████████████████████████████████████▌            | 3704/4671 [01:24<00:34, 28.36it/s]

Checkpoint saved at 3700 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  80%|████████████████████████████████████████████████▏           | 3749/4671 [01:25<00:25, 35.61it/s]

Checkpoint saved at 3750 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  81%|████████████████████████████████████████████████▉           | 3806/4671 [01:28<00:31, 27.21it/s]

Checkpoint saved at 3800 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  82%|█████████████████████████████████████████████████▍          | 3853/4671 [01:30<00:32, 24.97it/s]

Checkpoint saved at 3850 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  84%|██████████████████████████████████████████████████          | 3902/4671 [01:32<00:33, 23.29it/s]

Checkpoint saved at 3900 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  85%|██████████████████████████████████████████████████▋         | 3950/4671 [01:34<00:36, 19.76it/s]

Checkpoint saved at 3950 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  86%|███████████████████████████████████████████████████▍        | 4000/4671 [01:36<00:21, 31.95it/s]

Checkpoint saved at 4000 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  87%|████████████████████████████████████████████████████        | 4052/4671 [01:39<00:34, 18.15it/s]

Checkpoint saved at 4050 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  88%|████████████████████████████████████████████████████▋       | 4105/4671 [01:41<00:16, 34.85it/s]

Checkpoint saved at 4100 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  89%|█████████████████████████████████████████████████████▎      | 4150/4671 [01:43<00:27, 18.95it/s]

Checkpoint saved at 4150 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  90%|█████████████████████████████████████████████████████▉      | 4201/4671 [01:46<00:19, 23.95it/s]

Checkpoint saved at 4200 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  91%|██████████████████████████████████████████████████████▌     | 4252/4671 [01:49<00:26, 15.94it/s]

Checkpoint saved at 4250 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  92%|███████████████████████████████████████████████████████▎    | 4302/4671 [01:53<00:28, 13.03it/s]

Checkpoint saved at 4300 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  93%|███████████████████████████████████████████████████████▉    | 4352/4671 [01:57<00:22, 14.22it/s]

Checkpoint saved at 4350 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  94%|████████████████████████████████████████████████████████▌   | 4401/4671 [02:02<00:36,  7.46it/s]

Checkpoint saved at 4400 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  95%|█████████████████████████████████████████████████████████▏  | 4450/4671 [02:06<00:23,  9.33it/s]

Checkpoint saved at 4450 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  96%|█████████████████████████████████████████████████████████▊  | 4500/4671 [02:10<00:16, 10.45it/s]

Checkpoint saved at 4500 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  97%|██████████████████████████████████████████████████████████▍ | 4551/4671 [02:15<00:09, 12.10it/s]

Checkpoint saved at 4550 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering:  99%|███████████████████████████████████████████████████████████ | 4601/4671 [02:20<00:05, 12.17it/s]

Checkpoint saved at 4600 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering: 100%|███████████████████████████████████████████████████████████▋| 4651/4671 [02:25<00:02,  7.11it/s]

Checkpoint saved at 4650 clusters → checkpoint_commentaire_clusters.csv


Fuzzy clustering: 100%|████████████████████████████████████████████████████████████| 4671/4671 [02:28<00:00, 31.38it/s]


Finalizing results...


Final aggregation: 100%|███████████████████████████████████████████████████████| 1898/1898 [00:00<00:00, 120826.72it/s]


Unnamed: 0,Cluster_Representative,Count,Variations,PerMille
0,stk bis,6517,stk bis|stk bis 300|stk klein|stk bis 700|stk ...,19.42
1,bis 300,5626,bis 300|bis 300 cm2|bis cm|bis 700|bis 100|bis...,16.76
5,korrodierter bewehrung,5028,korrodierter bewehrung|mit korrodierter bewehr...,14.98
3,cm2 mit,4809,cm2 mit|che mit|700 cm2 mit|100 cm2 mit|500 cm...,14.33
4,mit korrodierter,4469,mit korrodierter|cm2 mit korrodierter|stk korr...,13.32
...,...,...,...,...
1893,querschnittsverlust die stabanker und flanschm...,7,querschnittsverlust die stabanker und flanschm...,0.02
1894,flanschmuttern weisen keinen korrosionsschutz ...,7,flanschmuttern weisen keinen korrosionsschutz ...,0.02
1895,weisen keinen korrosionsschutz verzinkung auf,7,weisen keinen korrosionsschutz verzinkung auf,0.02
1896,korrosionsschutz verzinkung auf derzeitige kor...,7,korrosionsschutz verzinkung auf derzeitige kor...,0.02


✅ Saved final results as 'commentaire_clusters_summary_final.csv'
