In [None]:
import tslearn
import pandas as pd
from cuml.cluster import KMeans as KMeans
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
import cupy
import numpy
from os import listdir
from os.path import isfile, join
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import re

### 1. Extract labels from Input Data

Get the files list and the following labels:
1. SFH binning resolution (levels)
2. Redshift binning (z_start-end)

In [2]:
myDir = "/gpfswork/rech/owt/commun/galaxy_classification/2023-sfh-galaxy-classification/data/binned_SFHs/"
myFiles = sorted(listdir(myDir))
files = [f for f in myFiles if isfile(join(myDir, f))]
z_bins_list = []
level_bins_list = []
for data in files:
    result = re.search('binned_SFHs-(.*)-JWST_(.*).txt', data)
    level_bins = result.group(1)
    z_bins = result.group(2)
    z_bins_list.append(z_bins)
    level_bins_list.append(level_bins)
    
z_bins_list=sorted(list(set(z_bins_list)))
level_bins_list=sorted(list(set(level_bins_list)))
print(level_bins_list, z_bins_list)

['10levels', '11levels', '7levels', '8levels', '9levels'] ['z_0.5-1.0', 'z_1.0-1.5', 'z_1.5-2.0', 'z_2.0-2.5', 'z_2.5-3.0', 'z_3.0-3.5', 'z_3.5-4.0']


### 2. Plot cluster centers for each file

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster centers

In [12]:
outDir = "cluster_centers"

num_clusters = 6
seed = 0
numpy.random.seed(seed)

for data in files: 
    fig = plt.figure(figsize=(12, 18), dpi=150)
    result = re.search('binned_SFHs-(.*)-JWST_(.*).txt', data)
    level_bins = result.group(1)
    z_bins = result.group(2)
    print(data)
    df = pd.read_csv(join(myDir, data),sep='\t')    
    levels=df.columns[2:8]
    SFH_lev=df[levels].values
    X_train = SFH_lev
    #X_train = X_train[y_train < 4]  # Keep first 3 classes
    #numpy.random.shuffle(X_train)
    # Keep only 50 time series
    X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
    # Make time series shorter
    sz = X_train.shape[1]
    X_train=cupy.asarray(X_train)
    kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
    kmeans.fit(X_train)
    y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
    X_train = cupy.asnumpy(X_train)
    cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
    #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi
    
    for yi in range(6):
        plt.subplot(6, 3, yi + 1)
        plt.plot(cluster_centers[yi].ravel(), "r-")
        plt.xlim(0, sz)
        plt.ylim(-4, 4)
        plt.text(0.55, 0.85, 'Cluster %d' % (yi + 1), transform=plt.gca().transAxes)
        if yi == 1:
            plt.title(level_bins+" "+z_bins)

    plt.tight_layout()    
    fig.savefig(join(outDir,data+'.png'))
    plt.close()

binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_3.0-3.5.txt
binned_SFHs-10levels-JWST_z_3.5-4.0.txt
binned_SFHs-11levels-JWST_z_0.5-1.0.txt
binned_SFHs-11levels-JWST_z_1.0-1.5.txt
binned_SFHs-11levels-JWST_z_1.5-2.0.txt
binned_SFHs-11levels-JWST_z_2.0-2.5.txt
binned_SFHs-11levels-JWST_z_2.5-3.0.txt
binned_SFHs-11levels-JWST_z_3.0-3.5.txt
binned_SFHs-11levels-JWST_z_3.5-4.0.txt
binned_SFHs-7levels-JWST_z_0.5-1.0.txt
binned_SFHs-7levels-JWST_z_1-1.5.txt
binned_SFHs-7levels-JWST_z_1.5-2.0.txt
binned_SFHs-7levels-JWST_z_1.5-2.txt


  mean_t = numpy.nanmean(X_, axis=1, keepdims=True)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


binned_SFHs-7levels-JWST_z_2-2.5.txt
binned_SFHs-7levels-JWST_z_2.5-3.0.txt
binned_SFHs-7levels-JWST_z_3-3.5.txt
binned_SFHs-7levels-JWST_z_3.5-4.0.txt
binned_SFHs-8levels-JWST_z_0.5-1.0.txt
binned_SFHs-8levels-JWST_z_1.0-1.5.txt
binned_SFHs-8levels-JWST_z_1.5-2.0.txt
binned_SFHs-8levels-JWST_z_2.0-2.5.txt
binned_SFHs-8levels-JWST_z_2.5-3.0.txt
binned_SFHs-8levels-JWST_z_3.0-3.5.txt
binned_SFHs-8levels-JWST_z_3.5-4.0.txt
binned_SFHs-9levels-JWST_z_0.5-1.0.txt
binned_SFHs-9levels-JWST_z_1.0-1.5.txt
binned_SFHs-9levels-JWST_z_1.5-2.0.txt
binned_SFHs-9levels-JWST_z_2.0-2.5.txt
binned_SFHs-9levels-JWST_z_2.5-3.0.txt
binned_SFHs-9levels-JWST_z_3.0-3.5.txt
binned_SFHs-9levels-JWST_z_3.5-4.0.txt


### 3. Plot cluster centers for each SFH binning resolution

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster centers

In [4]:
outDir = "cluster_centers_2"

num_clusters = 6
seed = 0
numpy.random.seed(seed)
pre_level_bins = "10levels"
plt_id = 0    
fig = plt.figure(figsize=(12, 18), dpi=150)

for data in files:
    result = re.search('binned_SFHs-(.*)-JWST_(.*).txt', data)
    level_bins = result.group(1)
    z_bins = result.group(2)
    print(data)
    df = pd.read_csv(join(myDir, data),sep='\t')    
    levels=df.columns[2:8]
    SFH_lev=df[levels].values
    X_train = SFH_lev
    #X_train = X_train[y_train < 4]  # Keep first 3 classes
    #numpy.random.shuffle(X_train)
    # Keep only 50 time series
    X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
    # Make time series shorter
    sz = X_train.shape[1]
    X_train=cupy.asarray(X_train)
    kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
    kmeans.fit(X_train)
    y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
    X_train = cupy.asnumpy(X_train)
    cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
    #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi
    
    if pre_level_bins != level_bins:
        plt.tight_layout()
        #plt.show()
        fig.savefig(join(outDir,pre_level_bins+'.png'))
        plt.close()
        plt_id = 0    
        fig = plt.figure(figsize=(12, 18), dpi=150)
        
    plt_id = plt_id + 1
    plt.subplot(8,4,plt_id)
    for yi in range(6):
        plt.plot(cluster_centers[yi].ravel(),label='Cluster centers %d' % (yi + 1))    
    plt.text(0.55, 0.85,z_bins, transform=plt.gca().transAxes)
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    if plt_id == 1:
        plt.title('Cluster centers '+level_bins)
    
    pre_level_bins = level_bins

plt.tight_layout()
#plt.show()
fig.savefig(join(outDir,pre_level_bins+'.png'))
plt.close()

binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_3.0-3.5.txt
binned_SFHs-10levels-JWST_z_3.5-4.0.txt
binned_SFHs-11levels-JWST_z_0.5-1.0.txt
binned_SFHs-11levels-JWST_z_1.0-1.5.txt
binned_SFHs-11levels-JWST_z_1.5-2.0.txt
binned_SFHs-11levels-JWST_z_2.0-2.5.txt
binned_SFHs-11levels-JWST_z_2.5-3.0.txt
binned_SFHs-11levels-JWST_z_3.0-3.5.txt
binned_SFHs-11levels-JWST_z_3.5-4.0.txt
binned_SFHs-7levels-JWST_z_0.5-1.0.txt
binned_SFHs-7levels-JWST_z_1-1.5.txt
binned_SFHs-7levels-JWST_z_1.5-2.0.txt
binned_SFHs-7levels-JWST_z_1.5-2.txt


  mean_t = numpy.nanmean(X_, axis=1, keepdims=True)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


binned_SFHs-7levels-JWST_z_2-2.5.txt
binned_SFHs-7levels-JWST_z_2.5-3.0.txt
binned_SFHs-7levels-JWST_z_3-3.5.txt
binned_SFHs-7levels-JWST_z_3.5-4.0.txt
binned_SFHs-8levels-JWST_z_0.5-1.0.txt
binned_SFHs-8levels-JWST_z_1.0-1.5.txt
binned_SFHs-8levels-JWST_z_1.5-2.0.txt
binned_SFHs-8levels-JWST_z_2.0-2.5.txt
binned_SFHs-8levels-JWST_z_2.5-3.0.txt
binned_SFHs-8levels-JWST_z_3.0-3.5.txt
binned_SFHs-8levels-JWST_z_3.5-4.0.txt
binned_SFHs-9levels-JWST_z_0.5-1.0.txt
binned_SFHs-9levels-JWST_z_1.0-1.5.txt
binned_SFHs-9levels-JWST_z_1.5-2.0.txt
binned_SFHs-9levels-JWST_z_2.0-2.5.txt
binned_SFHs-9levels-JWST_z_2.5-3.0.txt
binned_SFHs-9levels-JWST_z_3.0-3.5.txt
binned_SFHs-9levels-JWST_z_3.5-4.0.txt


### 4. Plot cluster centers for each redshift bins

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster centers

In [15]:
outDir = "cluster_centers_3"

num_clusters = 6
seed = 0
numpy.random.seed(seed)
pre_z_bins = "z_0.5-1.0"
plt_id = 0    
fig = plt.figure(figsize=(12, 18), dpi=150)

for z_bins in z_bins_list:
    for level in range(7,12):
        data = 'binned_SFHs-'+str(level)+'levels-JWST_'+z_bins+'.txt'
        if isfile(join(myDir, data)):
            print(data)
            df = pd.read_csv(join(myDir, data),sep='\t')    
            levels=df.columns[2:8]
            SFH_lev=df[levels].values
            X_train = SFH_lev
            #X_train = X_train[y_train < 4]  # Keep first 3 classes
            #numpy.random.shuffle(X_train)
            # Keep only 50 time series
            X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
            # Make time series shorter
            sz = X_train.shape[1]
            X_train=cupy.asarray(X_train)
            kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
            kmeans.fit(X_train)
            y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
            X_train = cupy.asnumpy(X_train)
            cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
            #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi

            if pre_z_bins != z_bins:
                plt.tight_layout()
                #plt.show()
                fig.savefig(join(outDir,pre_z_bins+'.png'))
                plt.close()
                plt_id = 0    
                fig = plt.figure(figsize=(12, 18), dpi=150)

            plt_id = plt_id + 1
            plt.subplot(6,3,plt_id)
            for yi in range(6):
                plt.plot(cluster_centers[yi].ravel(),label='Cluster centers %d' % (yi + 1))    
            plt.text(0.55, 0.85,str(level)+'levels', transform=plt.gca().transAxes)
            plt.xlim(0, sz)
            plt.ylim(-4, 4)
            if plt_id == 1:
                plt.title('Cluster centers '+z_bins)
                
            pre_z_bins = z_bins
                
plt.tight_layout()
#plt.show()
fig.savefig(join(outDir,pre_z_bins+'.png'))
plt.close()

binned_SFHs-7levels-JWST_z_0.5-1.0.txt
binned_SFHs-8levels-JWST_z_0.5-1.0.txt
binned_SFHs-9levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-11levels-JWST_z_0.5-1.0.txt
binned_SFHs-7levels-JWST_z_1.0-1.5.txt
binned_SFHs-8levels-JWST_z_1.0-1.5.txt
binned_SFHs-9levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-11levels-JWST_z_1.0-1.5.txt
binned_SFHs-7levels-JWST_z_1.5-2.0.txt
binned_SFHs-8levels-JWST_z_1.5-2.0.txt
binned_SFHs-9levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-11levels-JWST_z_1.5-2.0.txt
binned_SFHs-7levels-JWST_z_2.0-2.5.txt
binned_SFHs-8levels-JWST_z_2.0-2.5.txt
binned_SFHs-9levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-11levels-JWST_z_2.0-2.5.txt
binned_SFHs-7levels-JWST_z_2.5-3.0.txt
binned_SFHs-8levels-JWST_z_2.5-3.0.txt
binned_SFHs-9levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-11levels-JWST_z_2.5-3.0.txt
binned_SFHs-7le

### 5. Plot cluster histogram for each SFH binning resolution

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster histogram

In [17]:
outDir = "hist_2"

num_clusters = 6
seed = 0
numpy.random.seed(seed)
pre_level_bins = "10levels"
plt_id = 0    
fig = plt.figure(figsize=(12, 18), dpi=150)

for data in files:
    result = re.search('binned_SFHs-(.*)-JWST_(.*).txt', data)
    level_bins = result.group(1)
    z_bins = result.group(2)
    print(data)
    df = pd.read_csv(join(myDir, data),sep='\t')    
    levels=df.columns[2:8]
    SFH_lev=df[levels].values
    X_train = SFH_lev
    #X_train = X_train[y_train < 4]  # Keep first 3 classes
    #numpy.random.shuffle(X_train)
    # Keep only 50 time series
    X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
    # Make time series shorter
    sz = X_train.shape[1]
    X_train=cupy.asarray(X_train)
    kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
    kmeans.fit(X_train)
    y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
    X_train = cupy.asnumpy(X_train)
    cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
    #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi    
    
    if pre_level_bins != level_bins:
        plt.tight_layout()
        #plt.show()
        fig.savefig(join(outDir,pre_level_bins+'.png'))
        plt.close()
        plt_id = 0    
        fig = plt.figure(figsize=(12, 18), dpi=150)        
    
    plt_id = plt_id + 1
    plt.subplot(8,4,plt_id)
    plt.hist(y_pred)   
    plt.text(0.55, 0.85,z_bins, transform=plt.gca().transAxes)
    if plt_id == 1:
        plt.title('Cluster histogram '+level_bins)
        
    pre_level_bins = level_bins

plt.tight_layout()
#plt.show()
fig.savefig(join(outDir,pre_level_bins+'.png'))
plt.close()

binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_3.0-3.5.txt
binned_SFHs-10levels-JWST_z_3.5-4.0.txt
binned_SFHs-11levels-JWST_z_0.5-1.0.txt
binned_SFHs-11levels-JWST_z_1.0-1.5.txt
binned_SFHs-11levels-JWST_z_1.5-2.0.txt
binned_SFHs-11levels-JWST_z_2.0-2.5.txt
binned_SFHs-11levels-JWST_z_2.5-3.0.txt
binned_SFHs-11levels-JWST_z_3.0-3.5.txt
binned_SFHs-11levels-JWST_z_3.5-4.0.txt
binned_SFHs-7levels-JWST_z_0.5-1.0.txt
binned_SFHs-7levels-JWST_z_1.0-1.5.txt
binned_SFHs-7levels-JWST_z_1.5-2.0.txt
binned_SFHs-7levels-JWST_z_2.0-2.5.txt
binned_SFHs-7levels-JWST_z_2.5-3.0.txt
binned_SFHs-7levels-JWST_z_3.0-3.5.txt
binned_SFHs-7levels-JWST_z_3.5-4.0.txt
binned_SFHs-8levels-JWST_z_0.5-1.0.txt
binned_SFHs-8levels-JWST_z_1.0-1.5.txt
binned_SFHs-8levels-JWST_z_1.5-2.0.txt
binned_SFHs-8levels-JWST_z_2.0-2.5.txt
binned_SFHs

### 6. Plot cluster histogram for each redshift bins

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster histogram

In [16]:
outDir = "hist_3"

num_clusters = 6
seed = 0
numpy.random.seed(seed)
pre_z_bins = "z_0.5-1.0"
plt_id = 0    
fig = plt.figure(figsize=(12, 18), dpi=150)

for z_bins in z_bins_list:
    for level in range(7,12):
        data = 'binned_SFHs-'+str(level)+'levels-JWST_'+z_bins+'.txt'
        if isfile(join(myDir, data)):
            print(data)
            df = pd.read_csv(join(myDir, data),sep='\t')    
            levels=df.columns[2:8]
            SFH_lev=df[levels].values
            X_train = SFH_lev
            #X_train = X_train[y_train < 4]  # Keep first 3 classes
            #numpy.random.shuffle(X_train)
            # Keep only 50 time series
            X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
            # Make time series shorter
            sz = X_train.shape[1]
            X_train=cupy.asarray(X_train)
            kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
            kmeans.fit(X_train)
            y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
            X_train = cupy.asnumpy(X_train)
            cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
            #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi

            if pre_z_bins != z_bins:
                plt.tight_layout()
                #plt.show()
                fig.savefig(join(outDir,pre_z_bins+'.png'))
                plt.close()
                plt_id = 0    
                fig = plt.figure(figsize=(12, 18), dpi=150)

            plt_id = plt_id + 1
            plt.subplot(6,3,plt_id)
            plt.hist(y_pred)   
            plt.text(0.55, 0.85,str(level)+'levels', transform=plt.gca().transAxes)
            if plt_id == 1:
                plt.title('Cluster histogram '+z_bins)
                
            pre_z_bins = z_bins
                
plt.tight_layout()
#plt.show()
fig.savefig(join(outDir,pre_z_bins+'.png'))
plt.close()

binned_SFHs-7levels-JWST_z_0.5-1.0.txt
binned_SFHs-8levels-JWST_z_0.5-1.0.txt
binned_SFHs-9levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-11levels-JWST_z_0.5-1.0.txt
binned_SFHs-7levels-JWST_z_1.0-1.5.txt
binned_SFHs-8levels-JWST_z_1.0-1.5.txt
binned_SFHs-9levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-11levels-JWST_z_1.0-1.5.txt
binned_SFHs-7levels-JWST_z_1.5-2.0.txt
binned_SFHs-8levels-JWST_z_1.5-2.0.txt
binned_SFHs-9levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-11levels-JWST_z_1.5-2.0.txt
binned_SFHs-7levels-JWST_z_2.0-2.5.txt
binned_SFHs-8levels-JWST_z_2.0-2.5.txt
binned_SFHs-9levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-11levels-JWST_z_2.0-2.5.txt
binned_SFHs-7levels-JWST_z_2.5-3.0.txt
binned_SFHs-8levels-JWST_z_2.5-3.0.txt
binned_SFHs-9levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-11levels-JWST_z_2.5-3.0.txt
binned_SFHs-7le

### 7. Plot cluster centers and histogram for each redshift bins at a given SFH resolution

Pipeline:
1. Preprocessing unsing TimeSeriesScalerMeanVariance from tslearn
2. Clustering using Kmeans from cuml
3. Plot cluster centers and histogram

In [5]:
outDir = "cluster_centers_hist"

num_clusters = 6
seed = 0
numpy.random.seed(seed)
plt_id = 0    
fig = plt.figure(figsize=(12, 18), dpi=150)

for z_bins in z_bins_list:
    data = 'binned_SFHs-10levels-JWST_'+z_bins+'.txt'
    if isfile(join(myDir, data)):
        print(data)
        df = pd.read_csv(join(myDir, data),sep='\t')    
        levels=df.columns[2:8]
        SFH_lev=df[levels].values
        X_train = SFH_lev
        #X_train = X_train[y_train < 4]  # Keep first 3 classes
        #numpy.random.shuffle(X_train)
        # Keep only 50 time series
        X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train)
        # Make time series shorter
        sz = X_train.shape[1]
        X_train=cupy.asarray(X_train)
        kmeans = KMeans(n_clusters=num_clusters, max_iter=300, init='scalable-k-means++')
        kmeans.fit(X_train)
        y_pred = cupy.asnumpy(kmeans.fit_predict(X_train))
        X_train = cupy.asnumpy(X_train)
        cluster_centers = cupy.asnumpy(kmeans.cluster_centers_)
        #X_train, y_predplt.figure(figsize=(12, 18), dpi=150)  # Increase the figure size and dpi

        plt_id = plt_id + 1
        plt.subplot(14,7,plt_id)
        for yi in range(6):
            plt.plot(cluster_centers[yi].ravel(),label='Cluster centers %d' % (yi + 1))    
        plt.text(0.55, 0.85,z_bins, transform=plt.gca().transAxes)
        plt.xlim(0, sz)
        plt.ylim(-4, 4)
        
        plt.subplot(14,7,plt_id+7)
        plt.hist(y_pred)   
        plt.text(0.55, 0.85,z_bins, transform=plt.gca().transAxes)
                
plt.tight_layout()
#plt.show()
fig.savefig(join(outDir,'10levels.png'))
plt.close()

binned_SFHs-10levels-JWST_z_0.5-1.0.txt
binned_SFHs-10levels-JWST_z_1.0-1.5.txt
binned_SFHs-10levels-JWST_z_1.5-2.0.txt
binned_SFHs-10levels-JWST_z_2.0-2.5.txt
binned_SFHs-10levels-JWST_z_2.5-3.0.txt
binned_SFHs-10levels-JWST_z_3.0-3.5.txt
binned_SFHs-10levels-JWST_z_3.5-4.0.txt
