In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
       # print(os.path.join(dirname, filename))
        pass

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
!pip install guided-filter-pytorch

Collecting guided-filter-pytorch
  Downloading guided_filter_pytorch-3.7.5-py3-none-any.whl.metadata (1.6 kB)
Downloading guided_filter_pytorch-3.7.5-py3-none-any.whl (3.8 kB)
Installing collected packages: guided-filter-pytorch
Successfully installed guided-filter-pytorch-3.7.5


In [4]:
import torch.optim as optim
import torch.nn.functional as F
import torch.cuda as cuda
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torchvision

In [5]:
import torch
import numpy as np
import cv2
from guided_filter_pytorch.guided_filter import GuidedFilter

def createLowFrequencyComponent(img, guided_filter_Radius = 10):

    image = cv2.imread(img)
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    img_tensor = torch.from_numpy(image).float().permute(2, 0, 1).unsqueeze(0) / 255.0
    gray_tensor = torch.from_numpy(grayscale_image).float().unsqueeze(0).unsqueeze(0) / 255.0

    # Use the already defined hr_x (GuidedFilter instance)
    GF = GuidedFilter(r=guided_filter_Radius, eps=0.01)

    low_freq_image = GF(gray_tensor, img_tensor)
    low_freq_image = low_freq_image.squeeze(0).permute(1, 2, 0)    ## convert tensor to proper image dimensions
    low_freq_image = low_freq_image.numpy()     ## convert tensor to numpy array

    return low_freq_image

def createHighFrequencyComponent(img, epsilon=0.01):

    image = cv2.imread(img)
    eps = np.full((1200, 1600, 3), epsilon)     ## for numerical stability
    eps_tensor = torch.from_numpy(eps).float().permute(0, 1, 2)     ## convert eps to tensor

    # create the low frequency image
    low_freq_image = createLowFrequencyComponent(img)
    low_freq_image = torch.from_numpy(low_freq_image)

    # create the high frequency image
    high_frequency_image = image/(low_freq_image + eps_tensor)
    Ih_yuv = cv2.cvtColor(high_frequency_image.detach().numpy(), cv2.COLOR_RGB2YUV)
    Y = Ih_yuv[:, :, 0]
    high_frequency_image = (Y - Y.min()) / (Y.max() - Y.min())

    return high_frequency_image

In [6]:
print(torch.__version__)
print(torchvision.__version__)

2.6.0+cu124
0.21.0+cu124


In [7]:
import os
import zipfile

# Path where Kaggle stores user-uploaded datasets
dataset_path = "/kaggle/input/11k-hands-training-dataset"

'''# Unzip (if needed)
with zipfile.ZipFile(f"{dataset_path}", 'r') as zip_ref:
    zip_ref.extractall("/kaggle/working/")'''

# Final path
data_root = "/kaggle/input/11k-hands-training-dataset/content/drive/MyDrive/train_images/train"

In [8]:
IMG_SIZE = 224
BATCH_SIZE = 32

# override the ImageFolder to include the custom fucntion
class CustomImageFolder(ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root=root, transform=None)  # disable transform for now
        self.base_transform = transform  # keep your transform pipeline without the custom fn

    def __getitem__(self, index):
        path, target = self.samples[index]

        # custom arg is the image itself in your case
        img = createLowFrequencyComponent(path)
        img = (img - img.min())/(img.max() - img.min())
        img = torch.from_numpy(img).permute(2, 0, 1).float()

        if self.base_transform is not None:
            img = self.base_transform(img)

        return img, target

base_transform = transforms.Compose([
    transforms.Resize((224, 224))
])

dataset = CustomImageFolder(root=data_root, transform=base_transform)
dataloader_stream1 = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
classes = dataset.classes

In [9]:
from torch import nn
from torchvision.models.alexnet import AlexNet_Weights

class ModifiedFirstStream(nn.Module):
    def __init__(self):
        super(ModifiedFirstStream, self).__init__()

        # Load pretrained AlexNet
        alexnet = torchvision.models.alexnet(pretrained=AlexNet_Weights.DEFAULT)

        # Use AlexNet features (conv1 to conv5)
        self.features = alexnet.features  # Conv layers

        # Use AlexNet fc6 and fc7
        self.fc6 = alexnet.classifier[0]  # Linear(9216, 4096)
        self.relu6 = alexnet.classifier[1]
        self.dropout6 = alexnet.classifier[2]

        self.fc7 = alexnet.classifier[3]  # Linear(4096, 4096)
        self.relu7 = alexnet.classifier[4]
        self.dropout7 = alexnet.classifier[5]

        # Custom fc8 and fc9 layers
        self.fc8 = nn.Linear(4096, 2048)
        self.relu8 = nn.ReLU()
        self.dropout8 = nn.Dropout(p=0.5)

        self.fc9 = nn.Linear(2048, 531)

        self.modfc = nn.Linear(531, 2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.features(x)              # conv1–conv5
        x = torch.flatten(x, 1)           # Flatten to (B, 9216)

        x = self.fc6(x)
        x = self.relu6(x)
        x = self.dropout6(x)

        x = self.fc7(x)
        x = self.relu7(x)
        x = self.dropout7(x)

        x = self.fc8(x)
        x = self.relu8(x)
        x = self.dropout8(x)

        x = self.fc9(x)
        x = self.modfc(x)
        x = self.softmax(x)

        return x

In [10]:
device = torch.device("cuda" if cuda.is_available() else "cpu")

model = ModifiedFirstStream()
model.load_state_dict(torch.load('/kaggle/input/stream1-model-state-dict/stream1_model.pth'))

modified_first_stream = model.to(device)

pretrained_params = (
    list(modified_first_stream.features.parameters())
    + list(modified_first_stream.fc6.parameters()) + list(modified_first_stream.relu6.parameters()) + list(modified_first_stream.dropout6.parameters())
    + list(modified_first_stream.fc7.parameters()) + list(modified_first_stream.relu7.parameters()) + list(modified_first_stream.dropout7.parameters())
    + list(modified_first_stream.fc8.parameters()) + list(modified_first_stream.relu8.parameters()) + list(modified_first_stream.dropout8.parameters())
    + list(modified_first_stream.fc9.parameters())
)

new_fc_params = list(modified_first_stream.modfc.parameters()) + list(modified_first_stream.softmax.parameters())

optimizer = optim.SGD([
    {'params': pretrained_params, 'lr': 1e-4},
    {'params': new_fc_params, 'lr': 0.002}
], momentum=0.9)

criterion = nn.CrossEntropyLoss()

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 204MB/s] 


In [11]:
import time
from tqdm import tqdm

num_epochs = 5
for epoch in range(num_epochs):
    modified_first_stream.train()
    total_loss = 0.0
    correct = 0
    total = 0

    # Start timing this epoch
    start_time = time.time()

    # tqdm progress bar for this epoch
    loop = tqdm(dataloader_stream1, total=len(dataloader_stream1), desc=f"Epoch {epoch+1}/{num_epochs}")

    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        outputs = modified_first_stream(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        # Update progress bar with latest metrics
        loop.set_postfix(loss=loss.item(), acc=correct / total)

    # End timing
    epoch_time = time.time() - start_time
    print(f"Epoch {epoch+1} completed in {epoch_time:.2f} sec — Accuracy: {correct/total:.4f}, Loss: {total_loss/total:.4f}")
    torch.save(modified_first_stream.state_dict(), "/kaggle/working/stream1_model.pth")

Epoch 1/5: 100%|██████████| 243/243 [51:30<00:00, 12.72s/it, acc=0.918, loss=0.432] 


Epoch 1 completed in 3090.65 sec — Accuracy: 0.9176, Loss: 0.3984


Epoch 2/5: 100%|██████████| 243/243 [49:10<00:00, 12.14s/it, acc=0.924, loss=0.531]


Epoch 2 completed in 2950.88 sec — Accuracy: 0.9235, Loss: 0.3917


Epoch 3/5: 100%|██████████| 243/243 [47:15<00:00, 11.67s/it, acc=0.928, loss=0.345]


Epoch 3 completed in 2835.22 sec — Accuracy: 0.9282, Loss: 0.3864


Epoch 4/5: 100%|██████████| 243/243 [48:31<00:00, 11.98s/it, acc=0.935, loss=0.411]


Epoch 4 completed in 2911.63 sec — Accuracy: 0.9347, Loss: 0.3788


Epoch 5/5: 100%|██████████| 243/243 [50:45<00:00, 12.53s/it, acc=0.937, loss=0.317]


Epoch 5 completed in 3045.36 sec — Accuracy: 0.9369, Loss: 0.3759


In [12]:
torch.save(modified_first_stream.state_dict(), "/kaggle/working/stream1_model.pth")