# **Packages**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# PCA
from sklearn.decomposition import PCA
# Normalisation
from sklearn.preprocessing import StandardScaler
# Hierarchical clustering
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
# On hot encoding
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
# date
from datetime import datetime

import seaborn as sns
import scipy.cluster.hierarchy as sch




from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"



# **Data Preparation**

Importer le jeu de données


In [2]:
data_set = pd.read_csv("/content/Dataset - Ads _ Levallois-Perret - 2019-08 - export-ads-levallois-perret-2019-08-27.csv")


Afficher le jeu de données

In [3]:
data_set.head(1)

Unnamed: 0,ID,URL,CRAWL_SOURCE,PROPERTY_TYPE,NEW_BUILD,DESCRIPTION,IMAGES,SURFACE,LAND_SURFACE,BALCONY_SURFACE,...,DEALER_NAME,DEALER_TYPE,CITY_ID,CITY,ZIP_CODE,DEPT_CODE,PUBLICATION_START_DATE,PUBLICATION_END_DATE,LAST_CRAWL_DATE,LAST_PRICE_DECREASE_DATE
0,22c05930-0eb5-11e7-b53d-bbead8ba43fe,http://www.avendrealouer.fr/location/levallois...,A_VENDRE_A_LOUER,APARTMENT,False,"Au rez de chaussée d'un bel immeuble récent,ap...","[""https://cf-medias.avendrealouer.fr/image/_87...",72.0,,,...,Lamirand Et Associes,AGENCY,54178039,Levallois-Perret,92300.0,92,2017-03-22T04:07:56.095,,2017-04-21T18:52:35.733,


Afficher la taille du jeu de donées


In [4]:
data_set.shape

(2164, 57)

Afficher les colonnes du jeu de données

In [5]:
data_set.columns

Index(['ID', 'URL', 'CRAWL_SOURCE', 'PROPERTY_TYPE', 'NEW_BUILD',
       'DESCRIPTION', 'IMAGES', 'SURFACE', 'LAND_SURFACE', 'BALCONY_SURFACE',
       'TERRACE_SURFACE', 'ROOM_COUNT', 'BEDROOM_COUNT', 'BATHROOM_COUNT',
       'LUNCHROOM_COUNT', 'TOILET_COUNT', 'FURNISHED', 'FIREPLACE',
       'AIR_CONDITIONING', 'GARDEN', 'SWIMMING_POOL', 'BALCONY', 'TERRACE',
       'CELLAR', 'PARKING', 'PARKING_COUNT', 'HEATING_TYPES', 'HEATING_MODE',
       'FLOOR', 'FLOOR_COUNT', 'CONSTRUCTION_YEAR', 'ELEVATOR', 'CARETAKER',
       'ENERGY_CONSUMPTION', 'GREENHOUSE_GAS_CONSUMPTION', 'MARKETING_TYPE',
       'PRICE', 'PRICE_M2', 'PRICE_EVENTS', 'RENTAL_EXPENSES',
       'RENTAL_EXPENSES_INCLUDED', 'DEPOSIT', 'FEES', 'FEES_INCLUDED',
       'EXCLUSIVE_MANDATE', 'AGENCIES_UNWANTED', 'OCCUPIED', 'DEALER_NAME',
       'DEALER_TYPE', 'CITY_ID', 'CITY', 'ZIP_CODE', 'DEPT_CODE',
       'PUBLICATION_START_DATE', 'PUBLICATION_END_DATE', 'LAST_CRAWL_DATE',
       'LAST_PRICE_DECREASE_DATE'],
      dtype='obje

Afficher les informations sur chaque colonne

In [6]:
data_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2164 entries, 0 to 2163
Data columns (total 57 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ID                          2164 non-null   object 
 1   URL                         2164 non-null   object 
 2   CRAWL_SOURCE                2164 non-null   object 
 3   PROPERTY_TYPE               2164 non-null   object 
 4   NEW_BUILD                   1973 non-null   object 
 5   DESCRIPTION                 2160 non-null   object 
 6   IMAGES                      2164 non-null   object 
 7   SURFACE                     2050 non-null   float64
 8   LAND_SURFACE                3 non-null      float64
 9   BALCONY_SURFACE             0 non-null      float64
 10  TERRACE_SURFACE             25 non-null     float64
 11  ROOM_COUNT                  1835 non-null   float64
 12  BEDROOM_COUNT               696 non-null    float64
 13  BATHROOM_COUNT              0 non

Voir les colonnes qui contient le plus de NaN, pour le supprimer ensuite

In [7]:
data_set.isnull().sum()

ID                               0
URL                              0
CRAWL_SOURCE                     0
PROPERTY_TYPE                    0
NEW_BUILD                      191
DESCRIPTION                      4
IMAGES                           0
SURFACE                        114
LAND_SURFACE                  2161
BALCONY_SURFACE               2164
TERRACE_SURFACE               2139
ROOM_COUNT                     329
BEDROOM_COUNT                 1468
BATHROOM_COUNT                2164
LUNCHROOM_COUNT               2164
TOILET_COUNT                  2164
FURNISHED                     1697
FIREPLACE                     2164
AIR_CONDITIONING              2164
GARDEN                        2150
SWIMMING_POOL                 2158
BALCONY                       2164
TERRACE                       2078
CELLAR                        2164
PARKING                          0
PARKING_COUNT                 2007
HEATING_TYPES                    0
HEATING_MODE                  1511
FLOOR               

# Méthodologie de regroupement

Après étude de la base de données, on constate qu'il y'a une colonne "PROPERTY_TYPE" a cinq possibilités :

In [49]:
data_set["PROPERTY_TYPE"].unique()

array(['APARTMENT', 'PREMISES', 'HOUSE', 'PARKING', 'BUILDING'],
      dtype=object)

On constate aussi qu'il y'a une colonne "EXCLUSIVE_MANDATE", qui signifique que le proprietaire du bien immobilier a donner droit a un seul et unique agent immobilier de publie son bien immobilier, ainsi on peut par cette classification resortir les outliers.

In [50]:
data_set["EXCLUSIVE_MANDATE"].unique()

array([False,  True])

Comme on veut regrouper les annonces, avoir un data set petit nous facilite de regrouper les annonces suivant leurs similarites, donc notre méthodologie sera la suivant :

1. Diviser le data set en 5 parties suivant le type de la propriete noté par exemple dataset_P

2. Diviser chaque dataset_P en deux, suivant le type EXCLUSIVE_MANDATE, avec si s'est vrai nous permet de resortir directement l'annocne(pas toujours mais mieux) unique du bien immobilier.

In [51]:
### Apartment
data_apar = data_set[data_set["PROPERTY_TYPE"] == 'APARTMENT']
data_apar_ex = data_apar[data_apar["EXCLUSIVE_MANDATE"] == True]       # Si EXCLUSIVE_MANDATE est bien signé
data_apar_no_ex = data_apar[data_apar["EXCLUSIVE_MANDATE"] == False]   # Si EXCLUSIVE_MANDATE non signé

# Taille
data_apar.shape
data_apar_ex.shape
data_apar_no_ex.shape

(1436, 57)

(135, 57)

(1301, 57)

In [52]:
### Premises
data_pre = data_set[data_set["PROPERTY_TYPE"] == 'PREMISES']
data_pre_ex = data_pre[data_pre["EXCLUSIVE_MANDATE"] == True]       # Si EXCLUSIVE_MANDATE est bien signé
data_pre_no_ex = data_pre[data_pre["EXCLUSIVE_MANDATE"] == False]   # Si EXCLUSIVE_MANDATE non signé

# Taille
data_pre.shape
data_pre_ex.shape
data_pre_no_ex.shape

(479, 57)

(22, 57)

(457, 57)

In [53]:
###  House
data_hou = data_set[data_set["PROPERTY_TYPE"] == 'HOUSE']
data_hou_ex = data_hou[data_hou["EXCLUSIVE_MANDATE"] == True]       # Si EXCLUSIVE_MANDATE est bien signé
data_hou_no_ex = data_hou[data_hou["EXCLUSIVE_MANDATE"] == False]   # Si EXCLUSIVE_MANDATE non signé

# Taille
data_hou.shape
data_hou_ex.shape
data_hou_no_ex.shape

(52, 57)

(2, 57)

(50, 57)

In [54]:
### Parking
data_par = data_set[data_set["PROPERTY_TYPE"] == 'PARKING']
data_par_ex = data_par[data_par["EXCLUSIVE_MANDATE"] == True]       # Si EXCLUSIVE_MANDATE est bien signé
data_par_no_ex = data_par[data_par["EXCLUSIVE_MANDATE"] == False]   # Si EXCLUSIVE_MANDATE non signé

# Taille
data_par.shape
data_par_ex.shape
data_par_no_ex.shape

(188, 57)

(8, 57)

(180, 57)

In [55]:
### Building
data_bui = data_set[data_set["PROPERTY_TYPE"] == 'BUILDING']
data_bui_ex = data_bui[data_bui["EXCLUSIVE_MANDATE"] == True]       # Si EXCLUSIVE_MANDATE est bien signé
data_bui_no_ex = data_bui[data_bui["EXCLUSIVE_MANDATE"] == False]   # Si EXCLUSIVE_MANDATE non signé

# Taille
data_bui.shape
data_bui_ex.shape
data_bui_no_ex.shape

(9, 57)

(0, 57)

(9, 57)

On constate bien qu'il y'a une énorme difference de taille, se qui nous aidera pour regrouper efficacement

# **Exploratory Data Analysis and Building Model**

### On commence par data_bui :

In [15]:
data_bui_ex  # Vide

Unnamed: 0,ID,URL,CRAWL_SOURCE,PROPERTY_TYPE,NEW_BUILD,DESCRIPTION,IMAGES,SURFACE,LAND_SURFACE,BALCONY_SURFACE,...,DEALER_NAME,DEALER_TYPE,CITY_ID,CITY,ZIP_CODE,DEPT_CODE,PUBLICATION_START_DATE,PUBLICATION_END_DATE,LAST_CRAWL_DATE,LAST_PRICE_DECREASE_DATE


On connait que les biens immobilier sont dans la même ville, on supprime alors les colonnes CITY_ID, CITY, ZIP_CODE, DEPT_CODE.

Comme on veut regroupe par rapport aux caracteristiques d'un bien et non par rapport au nom de l'agence et nom du site d'annonces et les caracteritiques de ses dernieres et d'après la commande precedent, on a des colonnes vide.

On conclue qu'on va garder que les colonnes suivents :

In [None]:
data_bui_no_ex_grp = data_bui_no_ex[['PRICE','PRICE_M2','SURFACE']]
data_bui_no_ex_grp


On a pas besoin de Machine learning pour regroupe ces annonces par rapport à un bien immobilier puisque qu'ils sont uniques, ainsi on a :



In [None]:
data_bui_no_ex_grp

 ### On passe a data_par :

In [19]:
data_par_ex

Unnamed: 0,ID,URL,CRAWL_SOURCE,PROPERTY_TYPE,NEW_BUILD,DESCRIPTION,IMAGES,SURFACE,LAND_SURFACE,BALCONY_SURFACE,...,DEALER_NAME,DEALER_TYPE,CITY_ID,CITY,ZIP_CODE,DEPT_CODE,PUBLICATION_START_DATE,PUBLICATION_END_DATE,LAST_CRAWL_DATE,LAST_PRICE_DECREASE_DATE
358,d0ad6680-4ae2-11e9-ba7a-0ff7ef4fe44b,http://www.logic-immo.com/detail-vente-aa9864c...,LOGIC_IMMO,PARKING,,LEVALLOIS Parc de la Planchette - Dans une rés...,"[""https://mmf.logic-immo.com/mmf/ads/photo-pro...",15.0,,,...,Ouest Avenue,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-03-20T07:36:02.5,,2019-05-19T07:11:08.006,2019-05-19
1213,760ec980-b7f2-11e9-9fab-c3006e339e11,https://www.paruvendu.fr/immobilier/location/p...,PARU_VENDU,PARKING,,"85 rue edouard vaillant, levallois perret, MAN...","[""https://media.paruvendu.fr/media_ext/photo3....",,,,...,Groupe Babylone,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-08-06T02:32:38.337,,2019-08-13T04:46:38.928,
1554,e7309d90-1ece-11e9-8321-1350c9447565,https://www.logic-immo.com/detail-location-f2d...,LOGIC_IMMO,PARKING,,Place de parking à louer au 2 rue Maurice Rave...,"[""https://mmf.logic-immo.com/mmf/ads/photo-pro...",17.0,,,...,yespark,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-01-23T05:22:36.476,,2019-08-07T21:30:11.93,
1613,4b83fb00-ae78-11e9-81e9-5f24299f2ef3,https://www.logic-immo.com/detail-location-96d...,LOGIC_IMMO,PARKING,,Rue Jules Guesde emplacement Parking au1 er so...,"[""https://mmf.logic-immo.com/mmf/ads/photo-pro...",,,,...,GROUPE SOGESTIM,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-07-25T01:05:25.204,,2019-08-16T18:52:36.285,
1731,a7589690-5afc-11e9-adc1-59e67c527828,https://www.seloger.com/annonces/achat/parking...,SE_LOGER,PARKING,,Levallois • So Ouest • Parking • Exclusivité. ...,"[""https://v.seloger.com/s/width/800/visuels/0/...",,,,...,Bosetta Immobilier,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-03-20T16:20:23.286,,2019-08-08T18:00:13.446,
1773,c1ba9cb0-5730-11e9-bcf5-3b398c242d19,https://www.seloger.com/annonces/achat/parking...,SE_LOGER,PARKING,,En exclusivité dans votre agence ERA Levallois...,"[""https://pix.yanport.com/ads/c1ba9cb0-5730-11...",,,,...,Era Immobilier Levallois,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-04-04T23:24:11.983,,2019-08-25T19:11:30.302,2019-06-25
1930,eb8cee30-4b41-11e9-ba7a-0ff7ef4fe44b,https://immobilier.lefigaro.fr/annonces/annonc...,EXPLORIMMO,PARKING,,Levallois - So Ouest - Parking - Exclusivité. ...,"[""https://thbr.figarocms.net/images/8eX8P9teA8...",,,,...,Bosetta Immobilier,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-03-20T18:56:48.789,,2019-08-13T03:58:21.878,
2162,8cba88c0-a07a-11e9-a8e6-0de7b497e456,https://www.logic-immo.com/detail-location-bae...,LOGIC_IMMO,PARKING,,"Levallois-Perret, rue Paul Vaillant Couturier,...","[""https://mmf.logic-immo.com/mmf/ads/photo-pro...",,,,...,Nicolas Lanteri Immobilier,AGENCY,54178039,Levallois-Perret,92300.0,92,2019-07-06T10:07:45.114,,2019-08-27T11:53:04.317,


De la même maniere on a ci-dessus les biens immobilers uniques

In [62]:
#
data_par_no_ex_grp = data_par_no_ex[['PRICE_M2','SURFACE','PRICE']]
data_par_no_ex_grp = data_par_no_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_par_no_ex_grp['Cluster'] = dbscan.fit_predict(data_par_no_ex_grp)

# Display the clusters
unique_clusters = data_par_no_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_par_no_ex_grp[data_par_no_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

Cluster 0:
      PRICE_M2  SURFACE  PRICE  Cluster
8         9.09     11.0  100.0        0
1487      9.09     11.0  100.0        0
Cluster 1:
      PRICE_M2  SURFACE  PRICE  Cluster
27        10.0     10.0  100.0        1
710       10.0     10.0  100.0        1
1100      10.0     10.0  100.0        1
1359      10.0     10.0  100.0        1
1463      10.0     10.0  100.0        1
1941      10.0     10.0  100.0        1
Cluster 2:
      PRICE_M2  SURFACE  PRICE  Cluster
33        8.33     12.0  100.0        2
440       8.33     12.0  100.0        2
500       8.33     12.0  100.0        2
1099      8.33     12.0  100.0        2
Cluster 3:
      PRICE_M2  SURFACE  PRICE  Cluster
43        14.4     10.0  144.0        3
788       14.4     10.0  144.0        3
1634      14.4     10.0  144.0        3
Cluster 4:
     PRICE_M2  SURFACE    PRICE  Cluster
84     1600.0     15.0  24000.0        4
169    1600.0     15.0  24000.0        4
Cluster 5:
     PRICE_M2  SURFACE  PRICE  Cluster
110      6.8

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_par_no_ex_grp['Cluster'] = dbscan.fit_predict(data_par_no_ex_grp)


On voit bien qu'on a les annonces bien grouper !

### On passe a data_hou :


In [None]:
data_hou_ex

Pas besoin de regrouper

In [61]:
#
data_hou_no_ex_grp = data_hou_no_ex[['SURFACE','PRICE','PRICE_M2']]
data_hou_no_ex_grp = data_hou_no_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_hou_no_ex_grp['Cluster'] = dbscan.fit_predict(data_hou_no_ex_grp)

# Display the clusters
unique_clusters = data_hou_no_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_hou_no_ex_grp[data_hou_no_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

Cluster 0:
      SURFACE      PRICE  PRICE_M2  Cluster
4       330.0  2790000.0   8454.55        0
793     330.0  2790000.0   8454.55        0
1201    330.0  2790000.0   8454.55        0
1541    330.0  2790000.0   8454.55        0
1606    330.0  2790000.0   8454.55        0
1621    330.0  2790000.0   8454.55        0
1719    330.0  2790000.0   8454.55        0
1922    330.0  2790000.0   8454.55        0
1923    330.0  2790000.0   8454.55        0
1936    330.0  2790000.0   8454.55        0
Cluster 1:
     SURFACE   PRICE  PRICE_M2  Cluster
60      92.0  1890.0     20.54        1
603     92.0  1890.0     20.54        1
Cluster 2:
      SURFACE      PRICE  PRICE_M2  Cluster
74      127.0  1290000.0  10157.48        2
620     127.0  1290000.0  10157.48        2
944     127.0  1290000.0  10157.48        2
1031    127.0  1290000.0  10157.48        2
1230    127.0  1290000.0  10157.48        2
2134    127.0  1290000.0  10157.48        2
Cluster 3:
      SURFACE      PRICE  PRICE_M2  Cluster


On a encore bien regrouper les annonces

### On passe a data_pre :

In [59]:
#
data_pre_ex_grp = data_pre_ex[['SURFACE','PRICE_M2','PRICE']]
data_pre_ex_grp = data_pre_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_pre_ex_grp['Cluster'] = dbscan.fit_predict(data_pre_ex_grp)

# Display the clusters
unique_clusters = data_pre_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_pre_ex_grp[data_pre_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

Cluster 0:
     SURFACE  PRICE_M2    PRICE  Cluster
313     55.0   1603.64  88200.0        0
545     55.0   1603.64  88200.0        0
Cluster 1:
      SURFACE  PRICE_M2  PRICE  Cluster
839      53.0     14.15  750.0        1
1140     53.0     14.15  750.0        1


On a regroupe pas mal les annonces

In [None]:
#
data_pre_no_ex_grp = data_pre_no_ex[['SURFACE','PRICE','PRICE_M2']]
data_pre_no_ex_grp = data_pre_no_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_pre_no_ex_grp['Cluster'] = dbscan.fit_predict(data_pre_no_ex_grp)

# Display the clusters
unique_clusters = data_pre_no_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_pre_no_ex_grp[data_pre_no_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

On voit qu'on a bien classer les annonces

### On passe a data_apar :

In [72]:
#
data_apar_ex_grp = data_apar_ex[['SURFACE','PRICE_M2','PRICE']]
data_apar_ex_grp = data_apar_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_apar_ex_grp['Cluster'] = dbscan.fit_predict(data_apar_ex_grp)

# Display the clusters
unique_clusters = data_apar_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_apar_ex_grp[data_apar_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

Cluster 0:
      SURFACE  PRICE_M2      PRICE  Cluster
71       45.0  10111.12  455000.44        0
1869     45.0  10111.11  455000.00        0
Cluster 1:
     SURFACE  PRICE_M2     PRICE  Cluster
137     73.0  10602.74  774000.0        1
968     73.0  10602.74  774000.0        1
Cluster 2:
     SURFACE  PRICE_M2     PRICE  Cluster
139     55.0  10727.27  590000.0        2
615     55.0  10727.27  590000.0        2
Cluster 3:
      SURFACE  PRICE_M2     PRICE  Cluster
142     85.43  11260.68  962000.0        3
2137    85.43  11260.68  962000.0        3
Cluster 4:
      SURFACE  PRICE_M2      PRICE  Cluster
200      43.0    8581.4  369000.27        4
1374     43.0    8581.4  369000.00        4
Cluster 5:
     SURFACE  PRICE_M2      PRICE  Cluster
202    110.0  10090.91  1110000.0        5
612    110.0  10090.91  1110000.0        5
942    110.0  10090.91  1110000.0        5
Cluster 6:
      SURFACE  PRICE_M2      PRICE  Cluster
207     120.0    8575.0  1029000.0        6
1287    120.0    8

In [73]:
#
data_apar_no_ex_grp = data_apar_no_ex[['SURFACE','PRICE_M2','PRICE']]
data_apar_no_ex_grp = data_apar_no_ex_grp.dropna()

# Perform DBSCAN clustering
from sklearn.cluster import DBSCAN

dbscan = DBSCAN(eps=1, min_samples=2)
data_apar_no_ex_grp['Cluster'] = dbscan.fit_predict(data_apar_no_ex_grp)

# Display the clusters
unique_clusters = data_apar_no_ex_grp['Cluster'].unique()
for cluster_num in unique_clusters:
    if cluster_num == -1:  # Cluster -1 represents noisy points
        continue
    cluster_data = data_apar_no_ex_grp[data_apar_no_ex_grp['Cluster'] == cluster_num]
    print(f'Cluster {cluster_num}:')
    print(cluster_data)

Cluster 0:
      SURFACE  PRICE_M2      PRICE  Cluster
2       267.0  18614.23  4970000.0        0
1097    267.0  18614.23  4970000.0        0
1172    267.0  18614.23  4970000.0        0
1529    267.0  18614.23  4970000.0        0
Cluster 1:
      SURFACE  PRICE_M2      PRICE  Cluster
5      142.76   9946.76  1420000.0        1
1089   142.76   9946.76  1420000.0        1
1851   142.76   9946.76  1420000.0        1
Cluster 2:
      SURFACE  PRICE_M2     PRICE  Cluster
9        57.0   10000.0  570000.0        2
92       57.0   10000.0  570000.0        2
1910     57.0   10000.0  570000.0        2
1952     57.0   10000.0  570000.0        2
Cluster 3:
      SURFACE  PRICE_M2   PRICE  Cluster
11       46.0     27.59  1269.0        3
134      46.0     27.59  1269.0        3
538      46.0     27.59  1269.0        3
1668     46.0     27.59  1269.0        3
Cluster 4:
      SURFACE  PRICE_M2     PRICE  Cluster
12       69.0   9695.65  669000.0        4
49       69.0   9695.65  669000.0        4


Enfin on a regrouper les annonces parfaitement !