In [None]:
!pip install torch torchvision torchaudio
!pip install git+https://github.com/openai/CLIP.git
!pip install matplotlib



In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip  # OpenAI's CLIP
from tqdm import tqdm

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load CLIP model and preprocessing
clip_model, _ = clip.load("ViT-B/32", device=device)

# Custom transform to 128x128 + CLIP normalization
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                         (0.26862954, 0.26130258, 0.27577711))
])

# Replace this with your dataset path
data_dir = "C:/Users/shazi/OneDrive/Desktop/VS Code/fyp/skintypepatches 128x128"  # example: "dataset/"

# Load dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Print class names
print("Classes:", dataset.classes)

# Freeze CLIP vision encoder layers
for param in clip_model.visual.parameters():
    param.requires_grad = False

# Build the model using CLIP ViT encoder
class CLIPSkinClassifier(nn.Module):
    def __init__(self, clip_model, num_classes=3):
        super(CLIPSkinClassifier, self).__init__()
        self.encoder = clip_model.visual
        self.classifier = nn.Sequential(
            nn.Linear(self.encoder.output_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        features = self.encoder(x)
        logits = self.classifier(features)
        return logits

# Initialize model
model = CLIPSkinClassifier(clip_model, num_classes=3).to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, labels in loop:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            loop.set_postfix(loss=loss.item(), acc=100. * correct / total)

        print(f"Epoch {epoch+1}: Loss={running_loss/len(dataloader):.4f}, Accuracy={100. * correct/total:.2f}%")

# Train
train_model(model, train_loader, criterion, optimizer, epochs=10)


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip  # OpenAI's CLIP
from tqdm import tqdm

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load CLIP model and preprocessing
clip_model, _ = clip.load("ViT-B/32", device=device)

# Custom transform to 128x128 + CLIP normalization
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                         (0.26862954, 0.26130258, 0.27577711))
])

# Your dataset path
data_dir = "C:/Users/shazi/OneDrive/Desktop/VS Code/fyp/skintypepatches 128x128"

# Load dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Print class names
print("Classes:", dataset.classes)

# Freeze CLIP vision encoder layers
for param in clip_model.visual.parameters():
    param.requires_grad = False

# Build model with CLIP ViT encoder
class CLIPSkinClassifier(nn.Module):
    def __init__(self, clip_model, num_classes=3):
        super(CLIPSkinClassifier, self).__init__()
        self.encoder = clip_model.visual
        self.classifier = nn.Sequential(
            nn.Linear(self.encoder.output_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = x.half()  # Ensure input is float16
        features = self.encoder(x)
        logits = self.classifier(features)
        return logits

# Initialize model and convert to half precision
model = CLIPSkinClassifier(clip_model, num_classes=3).to(device).half()

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, labels in loop:
            images = images.to(device).half()  # Match input type to model (float16)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            loop.set_postfix(loss=loss.item(), acc=100. * correct / total)

        print(f"Epoch {epoch+1}: Loss={running_loss/len(dataloader):.4f}, Accuracy={100. * correct/total:.2f}%")

# Train the model
train_model(model, train_loader, criterion, optimizer, epochs=10)


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip  # OpenAI's CLIP
from tqdm import tqdm

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load CLIP model and preprocessing
clip_model, _ = clip.load("ViT-B/32", device=device)

# Custom transform to 224x224 + CLIP normalization (corrected size)
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to 224x224 for CLIP compatibility
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                         (0.26862954, 0.26130258, 0.27577711))
])

# Replace this with your dataset path
data_dir = r"C:\Users\shazi\OneDrive\Desktop\VS Code\fyp\skintypepatches 128x128"  # example: "dataset/"

# Load dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)

# Create DataLoaders
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Print class names
print("Classes:", dataset.classes)

# Freeze CLIP vision encoder layers
for param in clip_model.visual.parameters():
    param.requires_grad = False

# Build the model using CLIP ViT encoder
class CLIPSkinClassifier(nn.Module):
    def __init__(self, clip_model, num_classes=3):
        super(CLIPSkinClassifier, self).__init__()
        self.encoder = clip_model.visual
        self.classifier = nn.Sequential(
            nn.Linear(self.encoder.output_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        features = self.encoder(x)
        logits = self.classifier(features)
        return logits

# Initialize model
model = CLIPSkinClassifier(clip_model, num_classes=3).to(device).float()  # Ensure model is in float32

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, labels in tqdm(loop):
            images, labels = images.to(device).float(), labels.to(device)  # Ensure input is in float32

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

            loop.set_postfix(loss=loss.item(), acc=100. * correct / total)

        print(f"Epoch {epoch+1}: Loss={running_loss/len(dataloader):.4f}, Accuracy={100. * correct/total:.2f}%")

# Train
train_model(model, train_loader, criterion, optimizer, epochs=10)


DNN

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import clip

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load CLIP model
clip_model, preprocess_clip = clip.load("ViT-B/32", device=device)
clip_model.eval()  # We won't fine-tune CLIP itself

# Dataset path
data_dir = 'stage1patches'

# Add slight augmentation to training set
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    preprocess_clip,
])

# Use only CLIP's transform for validation
val_transform = preprocess_clip

# Load dataset
full_dataset = datasets.ImageFolder(data_dir)
class_names = full_dataset.classes
print(f"Classes: {class_names}")

# Manual split with transforms applied
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_data, val_data = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_data.dataset.transform = train_transform
val_data.dataset.transform = val_transform

train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False, num_workers=4)

# Feature extractor from CLIP (used inside training loop)
def extract_features(model, images):
    with torch.no_grad():
        return model.encode_image(images).float()

# Improved classifier
class MLPClassifier(nn.Module):
    def __init__(self, input_dim=512, num_classes=3):
        super(MLPClassifier, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.classifier(x)

# Initialize classifier
model = MLPClassifier(input_dim=512, num_classes=len(class_names)).to(device)

# Loss, optimizer, scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=5, verbose=True)

# Training loop
best_acc = 0.0
epochs = 100
for epoch in range(epochs):
    model.train()
    total_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        features = extract_features(clip_model, images)

        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Validation
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            features = extract_features(clip_model, images)
            outputs = model(features)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    acc = correct / total
    scheduler.step(acc)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f} - Val Acc: {acc:.4f}")

    if acc > best_acc:
        best_acc = acc
        torch.save(model.state_dict(), "best_model.pth")

    if acc >= 0.96:
        print("🎯 Target accuracy of 96% reached! Stopping early.")
        break


Using device: cuda
Classes: ['dryPatches', 'normalPatch', 'oilyPatch']




Epoch 1/100 - Loss: 602.9782 - Val Acc: 0.4838
Epoch 2/100 - Loss: 571.0641 - Val Acc: 0.5034
Epoch 3/100 - Loss: 553.4542 - Val Acc: 0.5153
Epoch 4/100 - Loss: 541.4493 - Val Acc: 0.5281
Epoch 5/100 - Loss: 531.4673 - Val Acc: 0.5333
Epoch 6/100 - Loss: 517.9318 - Val Acc: 0.5533
Epoch 7/100 - Loss: 507.6806 - Val Acc: 0.5596
Epoch 8/100 - Loss: 497.1901 - Val Acc: 0.5764
Epoch 9/100 - Loss: 488.4810 - Val Acc: 0.5755
Epoch 10/100 - Loss: 478.9524 - Val Acc: 0.5856
Epoch 11/100 - Loss: 469.0457 - Val Acc: 0.5919
Epoch 12/100 - Loss: 458.9447 - Val Acc: 0.6022
Epoch 13/100 - Loss: 449.1411 - Val Acc: 0.6090
Epoch 14/100 - Loss: 442.1322 - Val Acc: 0.6151
Epoch 15/100 - Loss: 434.6999 - Val Acc: 0.6265
Epoch 16/100 - Loss: 424.5018 - Val Acc: 0.6249
Epoch 17/100 - Loss: 417.5149 - Val Acc: 0.6324
Epoch 18/100 - Loss: 412.0345 - Val Acc: 0.6384
Epoch 19/100 - Loss: 404.4299 - Val Acc: 0.6445
Epoch 20/100 - Loss: 393.3150 - Val Acc: 0.6508
Epoch 21/100 - Loss: 384.9321 - Val Acc: 0.6643
E

FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torch\utils\data\_utils\worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 50, in fetch
    data = self.dataset.__getitems__(possibly_batched_index)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torch\utils\data\dataset.py", line 420, in __getitems__
    return [self.dataset[self.indices[idx]] for idx in indices]
            ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torchvision\datasets\folder.py", line 245, in __getitem__
    sample = self.loader(path)
             ^^^^^^^^^^^^^^^^^
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torchvision\datasets\folder.py", line 284, in default_loader
    return pil_loader(path)
           ^^^^^^^^^^^^^^^^
  File "c:\Users\shazi\OneDrive\Desktop\VS Code\.venv\Lib\site-packages\torchvision\datasets\folder.py", line 262, in pil_loader
    with open(path, "rb") as f:
         ^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'stage1patches\\normalPatch\\normal (147)_patch_4959.jpg'
