In [15]:
# Code to split into single images the grids we save as samples

def split_grid_image(grid_image, grid_size, image_size=(128, 128), spacing=2):
    grid_h, grid_w = grid_size
    img_h, img_w = image_size
    images = []

    for row in range(grid_h):
        for col in range(grid_w):
            start_y = row * (img_h + spacing) + spacing
            start_x = col * (img_w + spacing) + spacing
            img = grid_image[start_y:start_y+img_h, start_x:start_x+img_w]
            images.append(img)

    return np.array(images)

grid_image = cv2.imread('/gpfswork/rech/tkc/uwa51yi/generative-models/logs/2024-05-25T15-11-04_example_training-toy-TNG/images/train/sampleslog-152000_e-002048_b-000011.png')
#grid_image = cv2.cvtColor(grid_image, cv2.COLOR_BGR2RGB)

images = split_grid_image(grid_image, (4, 4))

for i in range(16):
    image = images[i]
    cv2.imwrite(os.path.join('/gpfswork/rech/tkc/uwa51yi/generative-models/logs/2024-05-25T15-11-04_example_training-toy-TNG/images/test/single_img', f'image_{i}.png'), image)

In [1]:
import os
import cv2
import numpy as np
import umap
import plotly.graph_objects as go
import pandas as pd
import joblib
from sklearn.decomposition import PCA
import plotly.io as pio

def load_images_from_folder(folder):
    images = []
    filenames = []
    
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            images.append(img.flatten())
            filenames.append(filename)
    return np.array(images), filenames

def learn_umap_from_folder(folder):
    images, filenames = load_images_from_folder(folder)
    
    # Optionally, reduce dimensionality with PCA before UMAP to speed up
    if images.shape[1] > 100:  # Ensure we have enough dimensions for PCA
        pca = PCA(n_components=100)
        pca_result = pca.fit_transform(images)
    else:
        pca = None
        pca_result = images
    
    # Learn UMAP projection to 3D
    umap_3d = umap.UMAP(n_components=3, random_state=42)
    umap_result = umap_3d.fit_transform(pca_result)
    
    # Save the learned PCA and UMAP models
    joblib.dump(pca, 'pca_model.pkl')
    joblib.dump(umap_3d, 'umap_model.pkl')
    
    return pca, umap_3d, umap_result, filenames

def apply_umap_to_new_folder(pca, umap_3d, folder):
    images, filenames = load_images_from_folder(folder)
    
    # Apply PCA transformation if PCA was used during learning
    if pca is not None:
        pca_result = pca.transform(images)
    else:
        pca_result = images
    
    # Apply the learned UMAP transformation
    umap_result = umap_3d.transform(pca_result)
    
    return umap_result, filenames

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Learn UMAP from the first folder
folder_path_train = "/gpfswork/rech/tkc/uwa51yi/DDPMv2/improved-diffusion/Inception/Ideal_train/TNG_train_idealized"  # Change this to your training folder path
pca, umap_3d, umap_result_train, filenames_train = learn_umap_from_folder(folder_path_train)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [5]:
def plot_combined_umap(umap_results, filenames, labels, save_path=None):
    # Create DataFrames for visualization
    fig = go.Figure()

    colors = ['blue', 'red', 'green', 'sienna']  # Add more colors if needed

    for i, (umap_result, filename_list, label) in enumerate(zip(umap_results, filenames, labels)):
        df = pd.DataFrame(umap_result, columns=['x', 'y', 'z'])
        df['filename'] = filename_list
        
        fig.add_trace(go.Scatter3d(
            x=df['x'],
            y=df['y'],
            z=df['z'],
            mode='markers',
            marker=dict(
                size=0.5 if i==0 else 2,
                color=colors[i % len(colors)]
            ),
            name=label,
            text=df['filename'],
            hoverinfo='text'
        ))

    fig.update_layout(title='3D UMAP Projection of Images')

    if save_path:
        pio.write_html(fig, save_path)

    fig.show()


# Apply the learned UMAP to new folders
folder_path_DDPM = "/gpfswork/rech/tkc/uwa51yi/DDPMv2/improved-diffusion/samples/samples_fits_png_300k"  # Change this to your new folder path
umap_result_DDPM, filenames_DDPM = apply_umap_to_new_folder(pca, umap_3d, folder_path_DDPM)

#folder_path_SD = "/gpfswork/rech/tkc/uwa51yi/generative-models/logs/2024-05-25T15-11-04_example_training-toy-TNG/images/test/single_img"  # Change this to your new folder path
#umap_result_SD, filenames_SD = apply_umap_to_new_folder(pca, umap_3d, folder_path_SD)

#folder_path_Latent = "/gpfswork/rech/tkc/uwa51yi/generative-models/logs/2024-05-20T17-36-34_example_training-TNG-KL/images/test/single_img"  # Change this to your new folder path
#umap_result_Latent, filenames_Latent = apply_umap_to_new_folder(pca, umap_3d, folder_path_Latent)

# Plot the combined UMAP results and save the plot
plot_save_path = "UMAP_projection_Ideal.html"  # Change this to your desired save path
#plot_combined_umap(
#    [umap_result_train, umap_result_DDPM, umap_result_SD, umap_result_Latent],
#    [filenames_train, filenames_DDPM, filenames_SD, filenames_Latent],
#    ['Training Images', 'DDPM++ Images', 'SD Images', 'Latent Images'],
#    save_path=plot_save_path
#)

plot_combined_umap(
    [umap_result_train, umap_result_DDPM],
    [filenames_train, filenames_DDPM],
    ['Training Images', 'DDPM++ Images'],
    save_path=plot_save_path
)