In [6]:
!pip install efficientnet_pytorch

Collecting efficientnet_pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: efficientnet_pytorch
  Building wheel for efficientnet_pytorch (setup.py): started
  Building wheel for efficientnet_pytorch (setup.py): finished with status 'done'
  Created wheel for efficientnet_pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16464 sha256=fdb91429b1c7a46461ed44f07705c5fcd64bd9d198856302ec8c72bcd127d1f7
  Stored in directory: c:\users\cbe\appdata\local\pip\cache\wheels\03\3f\e9\911b1bc46869644912bda90a56bcf7b960f20b5187feea3baf
Successfully built efficientnet_pytorch
Installing collected packages: efficientnet_pytorch
Successfully installed efficientnet_pytorch-0.7.1


In [7]:
from efficientnet_pytorch import EfficientNet
import numpy as np
import cv2
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import torch
import tqdm
# model = EfficientNet.from_name('efficientnet-b7') importing the Efficientnet model

model = EfficientNet.from_pretrained('efficientnet-b7')

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth" to C:\Users\CBE/.cache\torch\hub\checkpoints\efficientnet-b7-dcc49843.pth
100%|████████████████████████████████████████████████████████████████████████████████| 254M/254M [04:35<00:00, 968kB/s]


Loaded pretrained weights for efficientnet-b7


In [33]:
import os
import cv2
import torch
import numpy as np
from sklearn.preprocessing import StandardScaler
from efficientnet_pytorch import EfficientNet

image_directory = './Before'


# Function to load and preprocess images
def load_and_preprocess_images(directory):
    images = []
    for filename in os.listdir(directory):
        if filename.endswith('.png') or filename.endswith('.jpg'):
            img = cv2.imread(os.path.join(directory, filename))
            if img is not None:
                # Preprocess the image (resize)
                img = cv2.resize(img, (128, 128))  # Resize to a standard size
                # Convert to RGB if grayscale
                if len(img.shape) == 2 or img.shape[2] == 1:
                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                images.append(img)
    return images

# Function to extract features from images
def extract_features(images):
    features = []
    scaler = StandardScaler()
    for img in images:
        # Convert image to float32
        img = img.astype(np.float32)
        # Normalize the image (optional, depending on model requirements)
        img = img / 255.0
        # Add batch dimension
        img = np.expand_dims(img, axis=0)
        img_tensor = torch.from_numpy(img).permute(0, 3, 1, 2)  # Change to [batch, channels, height, width]
        # Extract features
        with torch.no_grad():
            extracted_features = model.extract_features(img_tensor)
        # Flatten the features into 2D array
        extracted_features = extracted_features.squeeze().numpy()
        num_channels, height, width = extracted_features.shape
        flattened_features = extracted_features.reshape(num_channels, height * width).T  # Shape: [pixels, num_channels]
        # Apply StandardScaler
        normalized_feature = scaler.fit_transform(flattened_features)
        features.append(normalized_feature[0])
    return features

# Load and preprocess images
images = load_and_preprocess_images(image_directory)

# Extract features from images
Liste_of_features = extract_features(images)

print(Liste_of_features[0].shape)


(2560,)


In [34]:
print(len(Liste_of_features))
print(len(images))

318
318


In [35]:
all_features = Liste_of_features

num_strata = 5

# Apply KMeans clustering
kmeans = KMeans(n_clusters=num_strata, random_state=0)
strata_labels = kmeans.fit_predict(all_features)

print(strata_labels)

[0 2 0 1 3 4 4 3 4 1 3 1 3 1 3 0 1 0 3 3 1 0 3 0 1 0 0 3 3 4 3 3 0 2 3 3 3
 0 3 3 3 4 3 4 3 3 3 3 0 0 4 4 0 3 4 3 3 0 3 3 3 3 1 1 3 0 4 3 3 4 3 4 3 3
 0 3 4 0 2 4 3 3 2 3 4 4 0 1 3 0 2 3 3 2 0 4 3 1 3 3 2 4 1 0 1 3 3 3 1 0 3
 1 4 4 4 1 3 4 3 3 0 1 3 4 4 3 2 1 2 2 3 3 1 1 4 1 0 2 3 3 1 3 1 3 3 3 1 4
 1 3 3 3 4 4 0 1 3 3 1 4 3 4 0 3 2 1 2 1 3 0 1 2 3 1 0 3 3 3 1 3 3 2 3 4 3
 3 2 4 3 1 3 4 1 1 4 0 3 3 3 3 3 3 0 4 0 1 3 1 3 3 1 2 3 0 3 3 3 1 3 4 1 0
 2 3 4 3 3 1 1 4 1 4 3 3 0 1 1 3 1 4 4 4 3 3 3 3 4 4 3 3 1 1 1 4 0 1 3 1 1
 2 3 3 3 3 0 1 1 3 2 3 3 1 3 1 1 2 4 1 3 1 4 4 3 4 3 3 4 4 0 2 3 1 3 4 2 2
 3 1 3 3 3 0 4 1 1 3 0 2 0 3 3 4 3 1 4 4 2 0]


In [36]:
# Create a dictionary to store cells by strata
strata_dict = {i: [] for i in range(num_strata)}

for i, label in enumerate(strata_labels):
    strata_dict[label].append(i)

print(strata_dict)

{0: [0, 2, 15, 17, 21, 23, 25, 26, 32, 37, 48, 49, 52, 57, 65, 74, 77, 86, 89, 94, 103, 109, 120, 136, 154, 162, 169, 174, 195, 202, 204, 213, 221, 234, 254, 264, 288, 301, 306, 308, 317], 1: [3, 9, 11, 13, 16, 20, 24, 62, 63, 87, 97, 102, 104, 108, 111, 115, 121, 127, 132, 133, 135, 140, 142, 146, 148, 155, 158, 165, 167, 170, 173, 178, 189, 192, 193, 205, 207, 210, 217, 220, 227, 228, 230, 235, 236, 238, 250, 251, 252, 255, 257, 258, 265, 266, 271, 273, 274, 277, 279, 291, 297, 303, 304, 313], 2: [1, 33, 78, 82, 90, 93, 100, 126, 128, 129, 137, 164, 166, 171, 181, 186, 211, 222, 259, 268, 275, 289, 294, 295, 307, 316], 3: [4, 7, 10, 12, 14, 18, 19, 22, 27, 28, 30, 31, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 47, 53, 55, 56, 58, 59, 60, 61, 64, 67, 68, 70, 72, 73, 75, 80, 81, 83, 88, 91, 92, 96, 98, 99, 105, 106, 107, 110, 116, 118, 119, 122, 125, 130, 131, 138, 139, 141, 143, 144, 145, 149, 150, 151, 156, 157, 160, 163, 168, 172, 175, 176, 177, 179, 180, 182, 184, 185, 188, 190, 196, 

In [37]:
# Define the sample size
sample_size = 25  # Total number of images to sample
sampled_images = []

# Sample proportionally from each stratum
for stratum, image_indices in strata_dict.items():
    stratum_size = len(image_indices)
    stratum_sample_size = int(sample_size * (stratum_size / len(images)))
    
    # Randomly sample images from the stratum
    sampled_indices = np.random.choice(image_indices, stratum_sample_size, replace=False)
    sampled_images.extend([images[i] for i in sampled_indices])

In [38]:
output_directory = 'samples_output'
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for i, img in enumerate(sampled_images):
    output_path = os.path.join(output_directory, f'sampled_image_{i}.png')
    cv2.imwrite(output_path, img)