In [8]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, MiniBatchKMeans
import pickle
import numpy as np

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = ImageFolder(root="/Users/bastianchuttarsing/Downloads/logo_data", transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)


In [4]:
resnet50 = models.resnet50(pretrained=True)
resnet50 = torch.nn.Sequential(*(list(resnet50.children())[:-1]))  # Remove the classification head
resnet50.eval()




Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [10]:
features_list = []

with torch.no_grad():
    for images, labels in dataloader:
        outputs = resnet50(images)
        features = outputs.view(outputs.size(0), -1)
        features_list.append(features)

features_matrix = torch.vstack(features_list)


with open("features_list.pkl", "wb") as f:
    pickle.dump(features_list, f)


KeyboardInterrupt: 

In [None]:
features_matrix = features_matrix.numpy()

pca = PCA().fit(features_matrix)
explained_variance = np.cumsum(pca.explained_variance_ratio_)
optimal_n_components = np.argmax(explained_variance >= 0.95) + 1
print(optimal_n_components)



NameError: name 'features_matrix' is not defined

In [None]:
#features_matrix = features_matrix.numpy()

pca = PCA(n_components = optimal_n_components)  # You can specify the number of components
reduced_features = pca.fit_transform(features_matrix)


In [None]:
with open("reduced_features_list.pkl", "wb") as f:
    pickle.dump(reduced_features, f)

In [None]:
import matplotlib.pyplot as plt
distortions = []
K = range(1, 10)
for k in K:
    kmeanModel = MiniBatchKMeans(n_clusters=k)
    kmeanModel.fit(reduced_features)
    distortions.append(kmeanModel.inertia_)
    
# Plotting
plt.figure(figsize=(10, 6))
plt.plot(K, distortions, marker='o')
plt.xlabel('Number of Clusters (K)')
plt.ylabel('Distortion')
plt.title('Elbow Method For Optimal K')
plt.xticks(K)
plt.grid(True)
plt.show()