In [None]:
import pandas as pd
import numpy as np
import torch
import torchvision
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from PIL import Image
import torch.nn as nn
import os
from tqdm import tqdm

In [None]:
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transform = transforms.Compose([
    transforms.Resize((224, 224), interpolation=transforms.InterpolationMode.LANCZOS),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [None]:
df = pd.read_csv(r"/kaggle/input/data-split/meta_data_64_16_20.csv")
df

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
class FishClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super(FishClassifier, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
    def forward(self, x):
        return self.resnet(x)

model = FishClassifier()

In [None]:
model = nn.DataParallel(model)

state_dict = torch.load("/kaggle/input/resnet50/fish_classifier_ResNet50.pth", map_location=device)

model.load_state_dict(state_dict)

model = model.module

model.eval()

In [None]:
class ResNetFeatureExtractor(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.features = nn.Sequential(
            base_model.conv1,
            base_model.bn1,
            base_model.relu,
            base_model.maxpool,
            base_model.layer1,
            base_model.layer2,
            base_model.layer3,
            base_model.avgpool
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        return x



extractor = ResNetFeatureExtractor(model.resnet)
extractor.eval()
extractor = extractor.to(device)
extractor

In [None]:
base_folder = r"/kaggle/input/manhmeo/GG/xzyx7pbr3w-1"

features = []
labels = []
paths = []
types = []

for i, row in tqdm(df.iterrows(), desc="Feature Extraction"):
    path = os.path.join(base_folder, row["Fish Name"], row["Path"])
    try:
        if not os.path.exists(path):
            path = path.replace('_5', '_0')

        image = Image.open(path).convert("RGB")

        tensor_image = data_transform(image).unsqueeze(0).to(device)
        
        with torch.no_grad():
            features_batch = extractor(tensor_image)
        
        features.append(features_batch.squeeze(0).cpu().numpy())
        labels.append(row["Label"])
        types.append(row["Type"])
        paths.append(os.path.join(row["Fish Name"], row["Path"]))

    except Exception as e:
        print(f"Error at {path}: {e}")

results = pd.DataFrame({"Path": paths, "Label": labels, "Type": types})
results

In [None]:
cols = [f"Feature_{i}" for i in range(1024)]

data = pd.DataFrame(features, columns=cols)
data

In [None]:
result = pd.concat([results, data], axis=1)
result

In [None]:
new_df.to_csv(r"/kaggle/working/feature_extraction_ResNet50_Layer3.csv", index=False)