In [1]:
!git clone https://github.com/Mojtabamehrabi/PH-kmeans.git

Cloning into 'PH-kmeans'...
remote: Enumerating objects: 72, done.[K
remote: Counting objects: 100% (72/72), done.[K
remote: Compressing objects: 100% (60/60), done.[K
remote: Total 72 (delta 32), reused 23 (delta 9), pack-reused 0[K
Unpacking objects: 100% (72/72), done.


In [9]:
# imports
import numpy as np
import pandas as pd
from statistics import mean
from PHkmeans.src.data_utils.generate_synthetic_data import make_point_clouds
from gtda.homology import VietorisRipsPersistence
from PHkmeans.src.data_utils.vectorisation_methods import get_persistence_landscapes, get_betti_curves, get_persistence_images
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

import warnings
warnings.filterwarnings("ignore")

In [6]:
!pip install pdpm pot gudhi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [7]:
!python -m pip install -U giotto-tda

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
noise = [0, 1, 2, 3, 4, 5, 10]
n_samples_per_class = 10          #sample =  نمونه
homology_dimensions = [0, 1, 2]    #dimensions = ابعاد
n_clusters = 3

landscape_rand = [None] * len(noise)  #landscape = چشم انداز
betti_rand = [None] * len(noise)
image_rand = [None] * len(noise)

km = KMeans(n_clusters=3, init='k-means++')

for i, n in enumerate(noise):
    # Create synthetic data of 10 samples of 4 classes, circles, spheres, tori and random point clouds
    #ایجاد داده های مصنوعی از 10 نمونه از 4 کلاس، دایره، کره، توری و ابرهای نقطه تصادفی
    point_clouds, labels = make_point_clouds(n_samples_per_class, n_points=10, noise=n)
    # Compute persistence diagrams
    #محاسبه نمودارهای پایداری
    VR = VietorisRipsPersistence(homology_dimensions=homology_dimensions)
    diagrams = VR.fit_transform(point_clouds)
    # Compute persistence landscapes
    #مناظر پایداری را محاسبه کنید
    p_landscapes = get_persistence_landscapes(point_clouds, diagrams, n_layers=2, n_bins=50)
    # Compute betti curves
    # منحنی های بتی را محاسبه کنید
    betti_curves = get_betti_curves(point_clouds, diagrams, n_bins=100)
    # Compute persistence images
    #محاسبه تصاویر ماندگاری
    p_images = get_persistence_images(point_clouds, diagrams, n_bins=10)
    # predict labels
    #  برچسب ها را پیش بینی کنید  
    landscape_preds = km.fit_predict(p_landscapes)
    betti_preds = km.fit_predict(betti_curves)
    image_preds = km.fit_predict(p_images)
    # Compute rand score for each clustering
    # محاسبه امتیاز رند برای هر خوشه
    landscape_rand[i] = adjusted_rand_score(labels, landscape_preds)
    betti_rand[i] = adjusted_rand_score(labels, betti_preds)
    image_rand[i] = adjusted_rand_score(labels, image_preds)

# print ARI scores in table
# نمرات ARI را در جدول چاپ کنید
vector_scores = pd.DataFrame({'noise': noise,
                              'PL score': landscape_rand,
                              'PI score': image_rand,
                              'BC_score': betti_rand}).set_index('noise')
print(vector_scores)

       PL score  PI score  BC_score
noise                              
0      1.000000  1.000000  1.000000
1      1.000000  0.808229  0.209995
2      1.000000  1.000000  0.094252
3      0.717099  0.468058  0.160084
4      0.808229  1.000000  0.226667
5      0.806667  0.731042  0.408804
10     0.552535  0.440262  0.236702


In [11]:
def persistence_comparison(homology_dimensions: list, noise: int, iters: int):
    comparison = []
    landscape_scores = []
    image_scores = []
    # calculate
    for _ in range(iters):
        # initialise Persistent Homology
        #همسانی پایدار را راه اندازی کنید
        VR = VietorisRipsPersistence(homology_dimensions=homology_dimensions)
        # generate data with set noise level
        # تولید داده با سطح نویز تنظیم شده
        point_clouds, labels = make_point_clouds(n_samples_per_class, n_points=10, noise=noise)
        # create persistence diagrams
        # ایجاد نمودارهای ماندگاری
        diagrams = VR.fit_transform(point_clouds)
        # create persistence landscape and image vectors
        # بردارهای منظره و تصویر ماندگار ایجاد کنید
        p_landscapes = get_persistence_landscapes(point_clouds=point_clouds,
                                                  persistence_diagrams=diagrams,
                                                  n_layers=2,
                                                  n_bins=50)
        p_images = get_persistence_images(point_clouds=point_clouds,
                                          persistence_diagrams=diagrams,
                                          n_bins=10)
        # cluster based on vectors
        # خوشه بر اساس بردارها
        landscape_preds =  km.fit_predict(p_landscapes)
        image_preds = km.fit_predict(p_images)
        # calculate adjusted rand score for each vectorization
        # محاسبه امتیاز رند تعدیل شده برای هر برداری
        landscape_score = adjusted_rand_score(labels, landscape_preds)
        image_score = adjusted_rand_score(labels, image_preds)
        # append scores to list
        # نمرات را به لیست اضافه کنید
        landscape_scores.append(landscape_score)
        image_scores.append(image_score)
        # append 1 if PLs outperform PIs
        # ضمیمه 1 اگر   PL ها عملکرد بهتری از   PI دارند
        if image_score < landscape_score:
            comparison.append(1)
        else:
            comparison.append(0)
    print(f"For noise = {noise}, persistence landscapes outperform persistence images "
          f"{round(mean(comparison) * 100, 2)}% of the time.")
    print(f" Average Adjusted Rand Score for Persistence Landscapes: {round(mean(landscape_scores), 3)}")
    print(f" Std. Adjusted Rand Score for Persistence Landscapes: {round(np.std(landscape_scores), 3)}")
    print(f" Average Adjusted Rand Score for Persistence Images: {round(mean(image_scores), 3)}")
    print(f" Std. Adjusted Rand Score for Persistence Images: {round(np.std(image_scores), 3)}")

In [12]:
persistence_comparison(homology_dimensions=[0, 1, 2], noise=1.0, iters=100)

For noise = 1.0, persistence landscapes outperform persistence images 63.0% of the time.
 Average Adjusted Rand Score for Persistence Landscapes: 0.998
 Std. Adjusted Rand Score for Persistence Landscapes: 0.014
 Average Adjusted Rand Score for Persistence Images: 0.898
 Std. Adjusted Rand Score for Persistence Images: 0.096


In [13]:
from PHkmeans.src.pd_pm_kmeans import PD_KMeans, PM_KMeans
from PHkmeans.src.data_utils.pd_pm_methods import *


  # Create simulated data
#ایجاد داده های شبیه سازی شده
point_clouds, labels = make_point_clouds(n_samples_per_class, n_points=10, noise=1.0)

# Create PDs from simulated data
# PD ها را از داده های شبیه سازی شده ایجاد کنید
diagrams = []

for pc in point_clouds:
    norm_pc = normalise_pc(pc)
    diag = get_pd(norm_pc)
    diagrams.append(diag)

    # Clustering in Persistence Diagram Space
#خوشه بندی در فضای نمودار پایداری
km = PD_KMeans(n_clusters=3, init='kmeans++', random_state=123)
pd_preds = km.fit(diagrams)
print(f'PD ARI score: {adjusted_rand_score(labels, pd_preds)}') 

PD ARI score: 1.0


In [14]:
# get appropriate grid_width from list of PDs
#عرض _ شبکه مناسب را از لیست  PD ها دریافت کنید
grid_width = get_grid_width(diagrams)

# create list of PMs from PDs
# بسازید لیستی از pm ها از pd ها
mesrs = []
for diag in diagrams:
    concat_diag = np.concatenate(diag)
    mesr, _ = diag_to_mesr(concat_diag, unit_mass=1, grid_width=grid_width)
    mesrs.append(mesr)

pm_km = PM_KMeans(n_clusters=3, init='kmeans++', grid_width=grid_width)
pm_preds = pm_km.fit(mesrs)

print(f'PM ARI Score: {adjusted_rand_score(labels, pm_preds)}')

PM ARI Score: 0.8981703936425799
