# data loading and preprocessing


In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, Dataset
from PIL import Image

class ImageDataset(Dataset):
    def __init__(self, image_dir, label_file, image_type, binary_category, transform=None):
        self.image_dir = image_dir
        self.labels = pd.read_csv(label_file)
        self.labels = self.labels[self.labels['image_type'] == image_type]
        self.labels['label'] = (self.labels['category'] == binary_category).astype(int)
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.labels.iloc[idx, 2])
        image = Image.open(img_name).convert("RGB")
        label = self.labels.iloc[idx, 3]
        if self.transform:
            image = self.transform(image)
        return image, label

def load_data(config):
    transform = transforms.Compose([
        transforms.Resize(config['data']['image_size']),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    dataset = ImageDataset(
        config['data']['image_dir'], 
        config['data']['label_file'],
        config['data']['image_type'],
        config['data']['binary_category'],
        transform=transform
    )

    train_idx, test_idx = train_test_split(
        range(len(dataset)), test_size=config['data']['test_split'], stratify=dataset.labels['label'])

    train_idx, val_idx = train_test_split(
        train_idx, test_size=config['data']['validation_split'] / (1 - config['data']['test_split']), stratify=dataset.labels.iloc[train_idx]['label'])

    train_loader = DataLoader(Subset(dataset, train_idx), batch_size=config['data']['batch_size'], shuffle=True)
    val_loader = DataLoader(Subset(dataset, val_idx), batch_size=config['data']['batch_size'], shuffle=False)
    test_loader = DataLoader(Subset(dataset, test_idx), batch_size=config['data']['batch_size'], shuffle=False)

    return train_loader, val_loader, test_loader

# Model creation

In [3]:
import torch.nn as nn
import torchvision.models as models

def get_model(model_name, num_classes, pretrained=True):
    if model_name == "VGG16":
        model = models.vgg16(pretrained=pretrained)
        model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes)
    else:
        raise ValueError(f"Model {model_name} is not supported.")
    
    return model


# training 

In [4]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
# from src.model import get_model
# from src.data_loading import load_data
# from src.utils import save_model
# from src.metrics import calculate_metrics

def train_model(config):
    train_loader, val_loader, _ = load_data(config)
    
    model = get_model(config['model']['name'], config['model']['num_classes'], config['model']['pretrained'])
    
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config['training']['learning_rate'])
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    model.train()
    for epoch in range(config['training']['epochs']):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{config['training']['epochs']}, Loss: {running_loss/len(train_loader)}")
        
        # Validation
        val_metrics = calculate_metrics(model, val_loader, device, criterion)
        print(f"Validation Metrics: {val_metrics}")
    
    save_model(model, config['training']['save_path'])

# evaluation

In [5]:
import torch
# from src.data_loading import load_data
# from src.utils import load_model
# from src.metrics import calculate_metrics

def evaluate_model(config):
    _, _, test_loader = load_data(config)
    model = load_model(config['training']['save_path'])
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    test_metrics = calculate_metrics(model, test_loader, device)
    print(f"Test Metrics: {test_metrics}")

# metrics

In [6]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def calculate_metrics(model, dataloader, device, criterion=None):
    model.eval()
    all_labels = []
    all_preds = []
    total_loss = 0.0
    
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            
            if criterion:
                loss = criterion(outputs, labels)
                total_loss += loss.item()
            
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    
    metrics = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }
    
    if criterion:
        metrics['loss'] = total_loss / len(dataloader)
    
    return metrics


# utils


In [7]:
import torch
# from src.model import get_model

def save_model(model, path):
    torch.save(model.state_dict(), path)

def load_model(path):
    model = get_model("VGG16", 2, True)  # Update this if you have different model or parameters
    model.load_state_dict(torch.load(path))
    return model


# main scipt


In [8]:
import yaml
# from src.train import train_model
# from src.evaluate import evaluate_model

if __name__ == "__main__":
    with open('/home/boukhari/projects/dental_image_project/configs/config.yaml', "r") as file:
        config = yaml.safe_load(file)
    
    train_model(config)
    evaluate_model(config)




Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/boukhari/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100.0%


FileNotFoundError: [Errno 2] No such file or directory: '/gstock/phenodent/all_images/IMG_C2_000432.jpg'

In [None]:
import torch

torch.cuda.set_device(0)  # Replace 0 with the index of the GPU you want to use

if torch.cuda.is_available():
    print("CUDA is available")
else:
    print("CUDA is not available")

CUDA is available
