### Import

In [1]:
import torch
import torch.nn
import os
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from transformers import AutoImageProcessor, Dinov2Model
from PIL import Image
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

print(torch.cuda.is_available())
print(torch.__version__)

False
2.6.0+cu124


CPU or CUDA

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [3]:
cwd = os.getcwd()
print("Current Working Directory:", cwd)
os.chdir(os.path.expanduser("~/scratch/"))
print("Changed Working Directory to:", os.getcwd())

Current Working Directory: /storage/home/hcoda1/2/cmarutha3/clef/animalclef-2025/user/chand2020
Changed Working Directory to: /storage/scratch1/2/cmarutha3


In [4]:
file_list = []
for root, dirs, files in os.walk("happy-whale-and-dolphin/train_images/"):
    for file in files:
        file_list.append(file)
len(file_list)

51033

### Extract Data

In [5]:
# class ExtractDataset(Dataset):
#     def __init__(self, data, preprocessor, transform=None):
#         self.data = data
#         self.preprocessor = preprocessor
#         self.transform = transform

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         item_image = self.data.loc[idx, "image"]
#         item_label = self.data.loc[idx, "individual_id"]
#         item_image_path = os.path.join("happy-whale-and-dolphin/train_images", item_image)
#         item_image = Image.open(item_image_path)
#         item_tensors = self.preprocessor(images=item_image, return_tensors="pt")
#         item = {
#             key: value.squeeze() for key, value in item_tensors.items()
#         }  # removing batch dimension
#         item["labels"] = torch.tensor(item_label)
#         return item

In [6]:
train_df = pd.read_csv("happy-whale-and-dolphin/train.csv")

In [7]:
train_df.head()

Unnamed: 0,image,species,individual_id
0,00021adfb725ed.jpg,melon_headed_whale,cadddb1636b9
1,000562241d384d.jpg,humpback_whale,1a71fbb72250
2,0007c33415ce37.jpg,false_killer_whale,60008f293a2b
3,0007d9bca26a99.jpg,bottlenose_dolphin,4b00fe572063
4,00087baf5cef7a.jpg,humpback_whale,8e5253662392


In [8]:
# Find unique values in the 'id' column
unique_ids = train_df["individual_id"].unique()
print("Unique IDs:", unique_ids)

# Count unique values in the 'id' column
count_unique_ids = train_df["individual_id"].value_counts()
print("Count of Unique IDs:\n", count_unique_ids)

Unique IDs: ['cadddb1636b9' '1a71fbb72250' '60008f293a2b' ... '3509cb6a8504'
 'e880e47c06a4' 'bc6fcab946c4']
Count of Unique IDs:
 individual_id
37c7aba965a5    400
114207cab555    168
a6e325d8e924    155
19fbb960f07d    154
c995c043c353    153
               ... 
df7668d8f754      1
03be3cdbff9c      1
8235d066b9fe      1
d1b416bae01d      1
f727bd52d5dc      1
Name: count, Length: 15587, dtype: int64


In [9]:
# Find unique values in the 'id' column
unique_ids = train_df["species"].unique()
print("Unique IDs:", unique_ids)

# Count unique values in the 'id' column
count_unique_ids = train_df["species"].value_counts()
print("Count of Unique IDs:\n", count_unique_ids)

Unique IDs: ['melon_headed_whale' 'humpback_whale' 'false_killer_whale'
 'bottlenose_dolphin' 'beluga' 'minke_whale' 'fin_whale' 'blue_whale'
 'gray_whale' 'southern_right_whale' 'common_dolphin' 'kiler_whale'
 'pilot_whale' 'dusky_dolphin' 'killer_whale' 'long_finned_pilot_whale'
 'sei_whale' 'spinner_dolphin' 'bottlenose_dolpin' 'cuviers_beaked_whale'
 'spotted_dolphin' 'globis' 'brydes_whale' 'commersons_dolphin'
 'white_sided_dolphin' 'short_finned_pilot_whale' 'rough_toothed_dolphin'
 'pantropic_spotted_dolphin' 'pygmy_killer_whale' 'frasiers_dolphin']
Count of Unique IDs:
 species
bottlenose_dolphin           9664
beluga                       7443
humpback_whale               7392
blue_whale                   4830
false_killer_whale           3326
dusky_dolphin                3139
spinner_dolphin              1700
melon_headed_whale           1689
minke_whale                  1608
killer_whale                 1493
fin_whale                    1324
gray_whale                   112

In [10]:
ids = ["a6e325d8e924", "19fbb960f07d", "37c7aba965a5", "114207cab555"]
train_df = train_df[train_df["individual_id"].isin(ids)].reset_index(drop=True)
le = LabelEncoder()
train_df["individual_id"] = le.fit_transform(train_df["individual_id"])
n_labels = train_df["individual_id"].nunique()
n_labels

4

In [11]:
train_df

Unnamed: 0,image,species,individual_id
0,001001f099519f.jpg,minke_whale,1
1,0024057bbc89a4.jpg,minke_whale,0
2,0046ceef89b3fc.jpg,minke_whale,2
3,005e53b1b6aada.jpg,minke_whale,2
4,0106d276033b78.jpg,minke_whale,2
...,...,...,...
872,fd284a3e06aaf9.jpg,minke_whale,0
873,feb3392c8ee510.jpg,minke_whale,2
874,ffc5eb215d5539.jpg,bottlenose_dolphin,3
875,ffc71880c3066b.jpg,minke_whale,1


In [12]:
# Find unique values in the 'id' column
unique_ids = train_df["individual_id"].unique()
print("Unique IDs:", unique_ids)

# Count unique values in the 'id' column
count_unique_ids = train_df["individual_id"].value_counts()
print("Count of Unique IDs:\n", count_unique_ids)

Unique IDs: [1 0 2 3]
Count of Unique IDs:
 individual_id
2    400
0    168
3    155
1    154
Name: count, dtype: int64


### Preprocessing and Model

In [13]:
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-base")
model = Dinov2Model.from_pretrained("facebook/dinov2-base")
model.to(device)

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-11): 12 x Dinov2Layer(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2SdpaAttention(
          (attention): Dinov2SdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_pa

### Dataset

In [14]:
# train_dataset = ExtractDataset(train_df, processor)
# train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
# validation_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)

In [15]:
# train_dataset

In [16]:
# train_loader

### Model Training

In [17]:
each = 0
item_image = train_df.iloc[each]["image"]
print(item_image)
item_label = train_df.iloc[each]["individual_id"]
print(item_label)

001001f099519f.jpg
1


In [18]:
df_last_hidden_states = pd.DataFrame([])
for each in range(train_df.shape[0]):
    try:
        item_image = train_df.iloc[each]["image"]
        item_label = train_df.iloc[each]["individual_id"]
        item_image_path = os.path.join(
            "happy-whale-and-dolphin/train_images", item_image
        )
        item_images = Image.open(item_image_path)
        inputs = processor(item_images, return_tensors="pt")

        with torch.no_grad():
            outputs = model(**inputs)
        print(outputs.shape)
        if df_last_hidden_states.shape[0] == 0:
            df_last_hidden_states = pd.DataFrame(
                outputs.last_hidden_state.reshape(0, 768)
            )
            df_last_hidden_states["label"] = item_label
        else:
            last_hidden_states = pd.DataFrame(outputs.last_hidden_state.reshape(0, 768))
            last_hidden_states["label"] = item_label
            df_last_hidden_states = pd.concat(
                [df_last_hidden_states, last_hidden_states], axis=0
            )
        print(df_last_hidden_states.shape)
    except Exception as e:
        print(f"{item_image} not found - {e}")
        continue
print(df_last_hidden_states.shape)

001001f099519f.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
0024057bbc89a4.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
0046ceef89b3fc.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
005e53b1b6aada.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
0106d276033b78.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
014ba64e8ce8ec.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
01637f0b588ed8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
0164d8fd7a7cfe.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
019e943e277f9d.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
01e775c518a824.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
02278d7a7d207e.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

176e98d361ac40.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
17ba5c5e1d5ec8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
17ba83f812f91b.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
188aa774e2deff.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
18ed53b1d002e2.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
190c825271069a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
19d457df19df44.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
19d74476736b24.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
19e97e679d1a88.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
1a0107195ec02c.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
1a166b231d5175.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

311581012b8171.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
3127094e0fcdf1.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
313325fb266949.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
31747777993e23.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
31fe85c3f22024.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
3217d4d6340db3.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
326c5ea726ecce.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
328c21f8487178.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
332b70d5c0f4c4.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
338384de8271c1.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
338f7f730a26ce.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

4a0d9884027f41.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4a9703ebb48427.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4c091db6af636f.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4c4b2972499942.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4c54da0b9261e8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4c73b616f74d4d.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4c7f02043878d8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4cc188517bf61a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4cd0d37d9425ae.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4cd37c15769539.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
4d8e00bb02376e.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

6833f4f568dc04.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
68ac0cc0be4498.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
695773ca3c23d1.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
69644ad506fded.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6a454d199defe0.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6a63ab6cb8fe2a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6a9e5f8fbe31a8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6ad787d000fa54.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6b3922e1fba7e8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6c0c897bebaea1.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
6c35982cb19643.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

828f5cde5ece16.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
82c2b65e8f928f.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
833ed3fe715612.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
834d2a19a6a990.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
8350505ca9a571.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
846f977fb9db44.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
8517a6cbce80b0.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
858363568d22d0.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
861aa103361eb0.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
861ad7ef176211.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
866df5384afc83.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

9c9235f12d680a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9ce37dff58d0f0.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9d2f50ea4c51c8.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9d4e93e224f81d.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9d639e1a14664f.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9dfcae10ec6160.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9e5c55c4b607b3.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9e842b28c6eefd.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9eae156a43f028.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9f989c2e41e21a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
9fa3b25723d7e1.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

bac68978362b5e.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bbe4cbfc7aea9e.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bbf5a4e6ddc4a4.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc1db8218906ce.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc47eca277f9c5.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc540e197e3f86.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc68bafdc087c5.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc6c01a7baf94b.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bc7174311a8461.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bcd997ad45434e.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
bd3219e99d5a48.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

d2524063d3f943.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d26f6650d448f6.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d2ac84abb232c1.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d2c8342355bc7a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d38319273a6f07.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d3ee4ebaa5030a.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d43c0c52d1e7b6.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d458dbe863ce80.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d46f21273c1721.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d4db0834440778.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
d54b039f381d47.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

ec383224cf7683.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ec4e926ad6569c.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ec82dd90515234.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ecdf9cb51ae459.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed01cd7429a6ef.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed3ef42d9f0d3b.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed588dd1c2537d.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed604026dbd827.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed701bcdd9ff06.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ed9753b8c03266.jpg not found - 'BaseModelOutputWithPooling' object has no attribute 'shape'
ee103d2413b265.jpg not found - 'BaseModelOutputWithPooling' object has no attrib

In [19]:
df_last_hidden_states.to_csv("data.csv", index=False)

In [20]:
df_last_hidden_states.head()

In [22]:
df_last_hidden_states = pd.read_csv("data.csv")

EmptyDataError: No columns to parse from file

In [None]:
flattened_states = df_last_hidden_states

# Apply PCA
n_components = 2  # Number of principal components to keep for visualization
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(flattened_states)

# Create a DataFrame for visualization
df_pca = pd.DataFrame(data=principal_components, columns=["PC1", "PC2"])
df_pca["label"] = df_last_hidden_states["label"]  # Repeat labels for each sequence

# Plot the PCA result
plt.figure(figsize=(10, 6))
for label in [0, 1, 2, 3]:
    subset = df_pca[df_pca["label"] == label]
    plt.scatter(subset["PC1"], subset["PC2"], label=f"Class {label}", alpha=0.7)

plt.title("PCA of DinoV2 Embeddings")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.legend()
plt.show()

In [None]:
flattened_states = df_last_hidden_states

# Apply PCA
n_components = 3
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(flattened_states)

# Create a DataFrame for visualization
df_pca = pd.DataFrame(data=principal_components, columns=["PC1", "PC2", "PC3"])
df_pca["label"] = df_last_hidden_states["label"]

# 3D scatter plot
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

# Scatter plot for each class
for label in [0, 1, 2, 3]:
    label_data = df_pca[df_pca["label"] == label]
    ax.scatter(label_data["PC1"], label_data["PC2"], label_data["PC3"], label=label)

ax.set_xlabel("Principal Component 1")
ax.set_ylabel("Principal Component 2")
ax.set_zlabel("Principal Component 3")
ax.legend()

plt.title("3D PCA Scatter Plot with Class Labels")
plt.show()

In [None]:
data = flattened_states
labels = df_last_hidden_states["label"]

# Perform t-SNE
tsne = TSNE(n_components=2, perplexity=5)
embeddings_2d = tsne.fit_transform(data)

# Plot the embeddings with labels 0 and 1
plt.figure(figsize=(10, 8))
scatter = plt.scatter(
    embeddings_2d[:, 0], embeddings_2d[:, 1], c=labels, cmap="tab20", alpha=0.7, s=10
)
plt.colorbar()
plt.title("t-SNE of Embeddings with Labels 0 and 1")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.show()