In [1]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# 1. Load pre-trained model Resnet50
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        resnet = models.resnet50(pretrained=True)
        self.features = nn.Sequential(*list(resnet.children())[:-1])  # Remove the final classification layer

    def forward(self, x):
        x = self.features(x)
        return x.view(x.size(0), -1)  # Flatten

# 2. Function to extract features for a single image
def extract_features(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = model(image)
    return features.numpy()

# 3. Transform for input images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 4. Function to extract and save features for the entire dataset
def save_features(data_dir, feature_save_path, model, transform):
    all_features = []
    labels = []
    dem=0
    for subfolder in os.listdir(data_dir):
        subfolder_path = os.path.join(data_dir, subfolder)
        if os.path.isdir(subfolder_path):
            image_files = [f for f in os.listdir(subfolder_path) if f.endswith(('jpg', 'jpeg', 'png'))]
            for image_file in image_files:
                image_path = os.path.join(subfolder_path, image_file)
                features = extract_features(image_path, model, transform)
                all_features.append(features)
                dem+=1
                if dem%10==0:
                  print(dem)
                labels.append(image_file)  # Save the folder name as the label for the image

    all_features = np.vstack(all_features)  # Combine all features
    np.save(feature_save_path, all_features)  # Save features to a .npy file
    np.save('labels.npy', np.array(labels))  # Save labels to a .npy file

# 5. Load features from the saved file
def load_features(feature_save_path):
    return np.load(feature_save_path)

# Main execution
if __name__ == "__main__":
    data_dir = "/content/drive/MyDrive/ImagesData"
    feature_save_path = "/content/saved_features.npy"

    # Load the feature extractor model
    model = FeatureExtractor()
    model.eval()

    # Extract and save features from the database
    save_features(data_dir, feature_save_path, model, transform)

    # features = load_features(feature_save_path)
    print("Feature extraction completed and saved.")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 155MB/s]


10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
Feature extraction completed and saved.


In [4]:
import numpy as np

# Load the .npy file
data = np.load('/content/saved_features.npy')

# In case you have saved labels as well
labels = np.load('/content/labels.npy')

# Print the shape of the loaded data (optional)
print("Feature shape:", data.shape)
print("Labels shape:", labels.shape)

# Accessing the features and labels
print("First feature vector:", data[0])
print("First label:", labels[0])

Feature shape: (1200, 2048)
Labels shape: (1200,)
First feature vector: [0.18217397 0.18432961 0.0616415  ... 0.84824365 2.4770572  0.51128876]
First label: Headwear_26.2.jpg
