In [1]:
import os
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from torch.autograd import Variable
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans, SpectralClustering,BisectingKMeans
from sklearn.metrics import fowlkes_mallows_score, silhouette_score
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import DBSCAN

import warnings
warnings.filterwarnings("ignore")

In [5]:
image_dir='DMresizedimages'
feature_model=models.resnet18(pretrained=True)
feature_model=torch.nn.Sequential(*(list(feature_model.children())[:-1]))

def process_image():
    transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])
    return transform

X = []  
Y = []  
for image_filename in os.listdir(image_dir):
    if image_filename.endswith(".jpg"):
        image_path = os.path.join(image_dir, image_filename)
        class_name = image_filename.split('_')[0]
        img = Image.open(image_path).convert("RGB")
        t2 = process_image()
        img=t2(img)
        img=Variable(img.unsqueeze(0))
        features = feature_model(img).squeeze().detach().numpy()
        Y.append(class_name)
        X.append(features)
X = np.array(X)
Y = np.array(Y)


References:1.https://stackoverflow.com/questions/55083642/extract-features-from-last-hidden-layer-pytorch-resnet18
2.https://docs.python.org/3/library/os.html

In [7]:
pca=PCA(2)
pca_x=pca.fit_transform(X)

References:1.https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

In [16]:
fowlkes_mallow=[]
silhouette=[]
kmeansr = KMeans(n_clusters=4, init='random', random_state=42)
kmeansr_pred =kmeansr.fit_predict(pca_x)
fowlkes_mallow.append(fowlkes_mallows_score(Y, kmeansr_pred))
silhouette.append(silhouette_score(pca_x,kmeansr_pred))

Reference:https://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html#sklearn.cluster.KMeans

In [17]:
# (b) KMeans with init='k-means++'
kmeansplus= KMeans(n_clusters=4, init='k-means++', random_state=42)
kmeansplus_pred = kmeansplus.fit_predict(pca_x)
fowlkes_mallow.append(fowlkes_mallows_score(Y, kmeansplus_pred))
silhouette.append(silhouette_score(pca_x,kmeansplus_pred))


Reference:https://scikit-learn.org/stable/modules/clustering.html#k-means

In [19]:
cluster = SpectralClustering(n_clusters=4, random_state=42)
cluster_pred = cluster.fit_predict(pca_x)
fowlkes_mallow.append(fowlkes_mallows_score(Y, cluster_pred))
silhouette.append(silhouette_score(pca_x,cluster_pred))

Reference:https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralClustering.html#sklearn.cluster.SpectralClustering

In [20]:
bisecting = BisectingKMeans(n_clusters=4, init='random', random_state=42)
bisecting_pred = bisecting.fit_predict(pca_x)
fowlkes_mallow.append(fowlkes_mallows_score(Y, bisecting_pred))
silhouette.append(silhouette_score(pca_x,bisecting_pred))

Reference:https://scikit-learn.org/stable/modules/generated/sklearn.cluster.BisectingKMeans.html#sklearn.cluster.BisectingKMeans

In [21]:
for link in ['single','complete','average','ward']:
    model=AgglomerativeClustering(n_clusters=4, linkage= link)
    model_pred=model.fit_predict(pca_x)
    fowlkes_mallow.append(fowlkes_mallows_score(Y, model_pred))
    silhouette.append(silhouette_score(pca_x,model_pred))

Reference:https://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html#sklearn.cluster.AgglomerativeClustering

In [23]:
dbscan = DBSCAN(eps=0.03, min_samples=15)
dbscan_pred = dbscan.fit_predict(pca_x)
clus = len(set(dbscan_pred)) - (1 if -1 in dbscan_pred else 0)
print(f"Number of clusters : {clus}")
fowlkes_mallow.append(fowlkes_mallows_score(Y, dbscan_pred))
silhouette.append(silhouette_score(pca_x,dbscan_pred))

Number of clusters : 4


Reference:https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html#sklearn.cluster.DBSCAN

In [24]:
methods=['Kmean-init','kmeans++','spectral-clustering','bisectingkmeans','single','complete','average','ward','dbscan']

Reference:https://www.geeksforgeeks.org/create-a-pandas-dataframe-from-lists/

In [25]:
scores_df={'model':methods,'fowlkes_score':fowlkes_mallow ,'silhouette_score':silhouette}

In [26]:
scored_df=pd.DataFrame(scores_df)

In [28]:
scored_df

Unnamed: 0,model,fowlkes_score,silhouette_score
0,Kmean-init,0.291024,0.341904
1,kmeans++,0.290342,0.341737
2,spectral-clustering,0.316235,0.303841
3,bisectingkmeans,0.281432,0.283964
4,single,0.499392,0.135647
5,complete,0.279024,0.248104
6,average,0.388379,0.236351
7,ward,0.290948,0.286439
8,dbscan,0.321849,-0.164484


(c) Rank the methods from the best to the worst for our dataset based on Fowlkes-Mallows index.
(0.5 point)


In [29]:
scored_df.sort_values(by="fowlkes_score",ascending=False)

Unnamed: 0,model,fowlkes_score,silhouette_score
4,single,0.499392,0.135647
6,average,0.388379,0.236351
8,dbscan,0.321849,-0.164484
2,spectral-clustering,0.316235,0.303841
0,Kmean-init,0.291024,0.341904
7,ward,0.290948,0.286439
1,kmeans++,0.290342,0.341737
3,bisectingkmeans,0.281432,0.283964
5,complete,0.279024,0.248104


(d) Rank the methods from the best to the worst for our dataset based on Silhouette Coefficient.
(0.5 point

In [30]:
scored_df.sort_values(by="silhouette_score",ascending=False)

Unnamed: 0,model,fowlkes_score,silhouette_score
0,Kmean-init,0.291024,0.341904
1,kmeans++,0.290342,0.341737
2,spectral-clustering,0.316235,0.303841
7,ward,0.290948,0.286439
3,bisectingkmeans,0.281432,0.283964
5,complete,0.279024,0.248104
6,average,0.388379,0.236351
4,single,0.499392,0.135647
8,dbscan,0.321849,-0.164484


Reference: https://www.geeksforgeeks.org/create-a-pandas-dataframe-from-lists/

    