In [None]:
import numpy as np
from PIL import Image
import os
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image as keras_image
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler
# Mount Google Drive

drive.mount('/content/drive')
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        try:
            img = keras_image.load_img(img_path, target_size=(200,200))  # VGG16 expects images of size 224x224
            img_data = keras_image.img_to_array(img)
            img_data = np.expand_dims(img_data, axis=0)
            img_data = preprocess_input(img_data)
            images.append(img_data)
        except Exception as e:
            print(f"Skipping file {img_path} due to error: {e}")
    return np.vstack(images)


# Specify the paths to your folders in Google Drive
folder_path_original = '/content/drive/MyDrive/Healthy/Healthy'
folder_path_generated = '/content/drive/MyDrive/hi/Healthy_510'

# Load the pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=False)

# Load original and generated images
original_images = load_images_from_folder(folder_path_original)
generated_images = load_images_from_folder(folder_path_generated)

# Ensure that the number of original and generated images is the same
min_length = min(len(original_images), len(generated_images))
original_images = original_images[:min_length]
generated_images = generated_images[:min_length]


# Ensure that the number of original and generated images is the same
assert len(original_images) == len(generated_images), "The number of original and generated images must be the same."

# Get the embeddings for all images
original_embeddings = model.predict(original_images)
generated_embeddings = model.predict(generated_images)

# Flatten the embeddings to 1D
original_embeddings_1D = original_embeddings.flatten()
generated_embeddings_1D = generated_embeddings.flatten()

# Normalize the embeddings to [0, 1] range
scaler = MinMaxScaler()
original_embeddings_1D = scaler.fit_transform(original_embeddings_1D.reshape(-1, 1)).flatten()
generated_embeddings_1D = scaler.transform(generated_embeddings_1D.reshape(-1, 1)).flatten()

from scipy.stats import wasserstein_distance, entropy
from numpy.linalg import norm

# Flatten the embeddings to 1D
original_embeddings_1D = original_embeddings.flatten()
generated_embeddings_1D = generated_embeddings.flatten()

# Calculate the Wasserstein distance
wasserstein_dist = wasserstein_distance(original_embeddings_1D, generated_embeddings_1D)

# Calculate the Jensen-Shannon divergence
def jensen_shannon_divergence(p, q):
    m = 0.5 * (p + q)
    return 0.5 * (entropy(p, m) + entropy(q, m))

jensen_shannon_div = jensen_shannon_divergence(original_embeddings_1D, generated_embeddings_1D)

print(f'Wasserstein Distance: {wasserstein_dist}')
print(f'Jensen-Shannon Divergence: {jensen_shannon_div}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Wasserstein Distance: 0.3960235728754258
Jensen-Shannon Divergence: 0.45850980281829834


In [None]:
from scipy.spatial.distance import jensenshannon
from scipy.linalg import sqrtm
import numpy as np

# Calculate the Jensen-Shannon divergence
def jensen_shannon_divergence(p, q):
    m = 0.5 * (p + q)
    return 0.5 * (entropy(p, m) + entropy(q, m))

# Normalize the embeddings to make them into probability distributions
generated_embeddings_normalized = generated_embeddings / (np.sum(generated_embeddings, axis=1, keepdims=True) + 1e-10)  # Add a small constant to avoid division by zero

# Calculate the pairwise Jensen-Shannon divergences
js_distances = np.zeros((len(generated_embeddings_normalized), len(generated_embeddings_normalized)))

for i in range(len(generated_embeddings_normalized)):
    for j in range(len(generated_embeddings_normalized)):
        js_distances[i, j] = jensen_shannon_divergence(generated_embeddings_normalized[i].flatten(), generated_embeddings_normalized[j].flatten())

# Calculate the minimum and maximum observed diversity scores
min_val = np.min(js_distances)
max_val = np.max(js_distances)

diversity_score = np.mean(js_distances)
# Normalize the diversity score
normalized_diversity_score = (diversity_score - min_val) / (max_val - min_val)

print(normalized_diversity_score )

0.8174721904650105
