### Package import

In [None]:
import torch
import onnxruntime
from typing import List, Dict, Any
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models


### Ground truth Data

In [None]:
ground_truth = pd.read_csv("../dataset/milk_true.csv")

mapping = {
    (1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0): 0,
    (0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0): 1,
    (0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0): 2,
    (0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0): 3,
    (0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0): 4,
    (0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0): 5,
    (0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0): 6,
    (0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0): 7,
    (0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0): 8,
    (0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0): 9,
    (0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0): 10
}
ground_truth = pd.DataFrame({
    'lesion_id':ground_truth["lesion_id"],
    'Class': ground_truth.iloc[:, 1:].values.tolist()
})
ground_truth['Class'] = ground_truth['Class'].apply(tuple).map(mapping)
ground_truth

### Train Data

In [None]:
import pandas as pd


mapping = {
    'head_neck_face': 'Head',
    'lower_extremity': 'Leg',
    'upper_extremity': 'Arm',
    'trunk': 'Torso',
    'foot': 'Feet',
    'genital': 'Genitalia',
    'hand': 'Hand'
}
dataset = pd.read_csv("../dataset/milk_meta.csv")
dataset = dataset[dataset['image_type'] == 'dermoscopic']
dataset = dataset[["lesion_id", "isic_id", "age_approx", "sex", "site"]]
dataset = dataset.dropna(subset=["age_approx", "site"])
dataset['site'] = dataset['site'].map(mapping)

dataset = dataset.rename(columns={
    'age_approx': 'Age',
    'sex': "Gender",
    'site': "Location",
})
dataset = dataset.reset_index(drop=True)
dataset = pd.merge(dataset, ground_truth, on = "lesion_id", how = "left")
dataset

### Preparing Dataset

In [None]:
import numpy as np
from enum import Enum, IntEnum

class LocationId(IntEnum):
    """Body location enumeration for skin lesions."""
    ARM = 1
    FEET = 2
    GENITALIA = 3
    HAND = 4
    HEAD = 5
    LEG = 6
    TORSO = 7

def _get_location_value(location_str: str) -> int:
    """Convert location string to numerical value using LocationId enum."""
    if not location_str:
        return -1
    
    try:
        # Convert to uppercase to match enum names
        location_enum = LocationId[location_str.upper()]
        return location_enum.value
    except KeyError:
        # Unknown/invalid location
        return -1


def _prepare_metadata_array(metadata: list[Dict[str, Any]]):
    """Convert metadata list to numpy array for ONNX model input"""
    # Convert metadata to numerical format
    metadata_array = None
    
    for entry in metadata:
        age = entry.get('age', 0) if entry.get('age') is not None else 0
        # Convert gender to numerical: male=1, female=0, unknown=-1
        gender_str = entry.get('gender', '').lower() if entry.get('gender') else ''
        if gender_str in ['male', 'm']:
            gender = 1
        elif gender_str in ['female', 'f']:
            gender = 0
        else:
            gender = -1  # Unknown/missing gender
        
        # Convert location to numerical using LocationId enum
        location_str = entry.get('location', '').lower() if entry.get('location') else ''
        location = _get_location_value(location_str)
        
        metadata_array = [age, gender, location]
    # return metadata_array
    return np.array(metadata_array, dtype=np.float32)

In [None]:
import pandas as pd

data = []
for i in range(len(dataset)):
    data.append([{"age":dataset.iloc[i]["Age"], "gender":dataset.iloc[i]["Gender"], "location":dataset.iloc[i]["Location"]}])


data = pd.DataFrame({"metadata":data})
data = dataset.join(data)
metadata = []
for i in range(len(data)):
    metadata_array = _prepare_metadata_array(data.iloc[i]["metadata"])
    metadata.append(metadata_array)
    
data = data[["lesion_id", "isic_id", "Class"]]
data = data.join(pd.DataFrame({"demographics":metadata}))

data

### Train Model

In [157]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
class SkinLesionDataset(Dataset):
    def __init__(self, data:pd.DataFrame, transform=None):
        self.data_frame = data
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        image = Image.open(f"../dataset/masked_images/{self.data_frame.iloc[idx, 1]}.jpg").convert('RGB')
        # Resize to 512x512
        image = image.resize((512, 512))
        # Convert to numpy array with [0,512] range
        image = np.array(image, dtype=np.float32)
        # Scale from [0,255] to [0,512]
        image = image * (512.0 / 255.0)
        
        image = np.transpose(image, (2, 0, 1))
        demographics = self.data_frame.iloc[idx, 3]  # Assuming demographic data starts from second column
        label = self.data_frame.iloc[idx, 2]  # Assuming last column is the label

        if self.transform:
            image = self.transform(image)

        return image, demographics, label

In [158]:
def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=10, device='cpu'):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for batch in train_loader:
            # Unpack the batch
            images, demographics, labels = batch
            # Move tensors to the specified device
            images = images.to(device)
            demographics = demographics.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images, demographics)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        scheduler.step()
        epoch_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

def validate_model(model, val_loader, device='cpu'):
    model.eval()
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for batch in val_loader:
            # Unpack the batch
            images, demographics, labels = batch

            # Move tensors to the specified device
            images = images.to(device)
            demographics = demographics.to(device)
            labels = labels.to(device)

            outputs = model(images, demographics)
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()

    accuracy = total_correct / total_samples
    print(f'Validation Accuracy: {accuracy:.4f}')


### Yolo Model for tricorder-3

In [161]:
from ultralytics import YOLO
import torch
import torch
yolo_model = YOLO("yolo11n-cls.pt")


class YoloSkinLesionModel(nn.Module):
    def __init__(self, num_classes=11):
        super().__init__()
        self.features = yolo_model.model

    def forward(self, image):
        image_features = self.features(image)
        logits = image_features[0]  # Assuming the last element contains the class logits
        
        # probabilities = nn.functional.softmax(logits, dim=1)
        return logits

model = YoloSkinLesionModel()

In [162]:
import torch
import torch.onnx


# from your_model_file import YourModelClass  # Import your model class
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Step 1: Load the model
model = YoloSkinLesionModel().to(device) # Instantiate your model class

# model.load_state_dict(torch.load('model/yolo_skin.pth'))  # Load the state dict
model.eval()  # Set the model to evaluation mode
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
img = f'../dataset/masked_images/ISIC_0159874.jpg'  # or a video file
image = Image.open(img).convert('RGB')
image = image.resize((512, 512))
image = np.array(image, dtype=np.float32)
image = image * (1 / 255.0)
image = np.transpose(image, (2, 0, 1))
image = torch.from_numpy(image).to(device)
image = image.unsqueeze(0)
result = model(image.to(device))
result

tensor([[2.8790e-05, 2.3751e-04, 2.4194e-04, 6.5015e-05, 3.8219e-04, 6.9760e-03, 2.9629e-03, 1.1742e-05, 3.4069e-05, 6.0960e-05, 2.3738e-04, 1.8362e-06, 1.0785e-04, 6.9470e-05, 3.4482e-06, 9.6169e-05, 5.0513e-06, 1.3797e-06, 4.7612e-05, 4.6992e-06, 1.4593e-05, 1.9119e-04, 3.4659e-05, 5.2969e-04, 2.0363e-05, 1.0978e-05,
         1.8526e-03, 6.7083e-04, 6.9065e-05, 3.3632e-03, 1.9830e-06, 1.0705e-05, 6.8715e-05, 1.7962e-05, 1.1758e-04, 4.4054e-05, 6.5479e-05, 5.6125e-06, 1.4071e-03, 4.7281e-06, 2.4888e-06, 4.0246e-04, 3.5702e-05, 8.9308e-06, 2.5310e-05, 7.2532e-05, 4.7138e-06, 2.4647e-05, 4.5391e-06, 4.4514e-06, 8.6625e-06, 1.0337e-06,
         4.7355e-04, 4.6870e-05, 2.0072e-04, 4.6409e-07, 2.7110e-06, 8.4682e-07, 1.3717e-04, 2.5602e-05, 1.9311e-04, 4.1580e-06, 3.0408e-04, 1.7678e-05, 3.1534e-05, 1.1181e-03, 2.1899e-04, 2.0960e-06, 2.9929e-04, 4.2355e-05, 1.1229e-04, 4.3304e-04, 4.7007e-05, 2.8546e-03, 1.4672e-04, 2.6341e-03, 2.2597e-04, 4.2415e-04,
         2.2998e-01, 5.3524e-03, 1.40

### tricorder-3 custom model

In [179]:
from ultralytics import YOLO

yolo_model = YOLO("yolo11n-cls")

# Define the model class (ensure this matches the saved model architecture)
class SimpleSkinLesionModel(nn.Module):
    def __init__(self, num_classes=11, num_demographics=3):
        super().__init__()
        self.features = nn.Sequential(
            *list(yolo_model.model.model)[:-1]  # Use all layers except the final classification layer
        )
        last_layer = list(yolo_model.model.model)[-1]
        self.last_conv = last_layer.conv
        self.last_pool = last_layer.pool
        self.list_drop = last_layer.drop
        
        
        self.demographics_processor = nn.Sequential(
            nn.Linear(num_demographics, 16),
            nn.BatchNorm1d(16),
            nn.ReLU(inplace=True),
        )

        self.classifier = nn.Linear(1280 + 16 , num_classes, bias=True)

    def forward(self, image, demographics):
        image_features = self.features(image)
        image_features = self.last_conv(image_features)
        image_features = self.last_pool(image_features)
        image_features = self.list_drop(image_features)
        image_features = image_features.view(image_features.size(0), -1)    
    
        demographics_features = self.demographics_processor(demographics)
        combined_features = torch.cat((image_features, demographics_features), dim=1)
        logits = self.classifier(combined_features)
        # probabilities = nn.functional.softmax(logits, dim=1)
        return logits


####

# import torch
# import torch.nn as nn
# from torchvision.models import efficientnet_b7, EfficientNet_B7_Weights

# class AccurateSkinLesionModel(nn.Module):
#     def __init__(self, num_classes=10, num_demographics=3):
#         super().__init__()
#         # Load pre-trained EfficientNet-B7 for high accuracy
#         backbone = efficientnet_b7(weights=EfficientNet_B7_Weights.IMAGENET1K_V1)
#         # Extract features up to the adaptive average pooling (exclude the final classifier)
#         self.features = nn.Sequential(*list(backbone.children())[:-2])  # Up to features, before avgpool and classifier
        
#         # Global average pooling to get fixed-size features
#         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
#         self.demographics_processor = nn.Sequential(
#             nn.Linear(num_demographics, 64),
#             nn.BatchNorm1d(64),
#             nn.ReLU(inplace=True)
#         )

#         # EfficientNet-B7 outputs 2560 features after pooling
#         self.classifier = nn.Linear(2560 + 64, num_classes, bias=False)

#     def forward(self, image, demographics):
#         # Extract image features (output shape: (B, 2560, 7, 7) or similar, then pool)
#         image_features = self.features(image)
#         image_features = self.avgpool(image_features)
#         image_features = torch.flatten(image_features, start_dim=1)
        
#         # Process demographics
#         demographics_features = self.demographics_processor(demographics)
        
#         # Combine and classify
#         combined_features = torch.cat((image_features, demographics_features), dim=1)
#         logits = self.classifier(combined_features)
        
#         return logits

In [180]:
data['Class'].value_counts()

Class
1     2518
8      712
3      541
9      473
7      444
0      303
4       52
5       49
10      45
2       43
6        9
Name: count, dtype: int64

In [181]:
import ultralytics
# Main training loop
def main():

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    # Initialize the model, loss function, and optimizer
    model = SimpleSkinLesionModel(num_classes=11, num_demographics=3).to(device)
    # model.load_state_dict(torch.load('skin_lesion_model_2_2.pth'), strict = False)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=3*1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=1/3)
    # Prepare the datasets and dataloaders
    train_dataset = SkinLesionDataset(data)
    train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
    print(train_loader)
    
    
    val_dataset = SkinLesionDataset(data)
    val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)

    # Train and validate the model
    train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=300, device=device)
    validate_model(model, val_loader, device=device)

    # Save the model
    torch.save(model.state_dict(), 'skin_lesion_model.pth')


In [None]:
if __name__ == '__main__':
    main()

cuda


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### Save onnx file

In [None]:
import torch
import torch.onnx
# from your_model_file import YourModelClass  # Import your model class
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Step 1: Load the model
model = YoloSkinLesionModel() # Instantiate your model class

# model.load_state_dict(torch.load('model/yolo_skin.pth'))  # Load the state dict
model.eval()  # Set the model to evaluation mode
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Step 2: Create a dummy input
dummy_image = torch.randn(1, 3, 512, 512)  # Adjust the shape as needed
dummy_demo = torch.tensor([[42.0, 1.0, 5.0]])  # age, sex, location
# Step 3: Export the model to ONNX
torch.onnx.export(
    model.to(device),
    (dummy_image.to(device)),
    "model/yolo_skin.onnx",
    export_params=True,
    opset_version=13,
    do_constant_folding=True,
    input_names=['image'],
    output_names=['output'],
    dynamic_axes={
        'image': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    }
)

In [None]:
from ultralytics import YOLO
from PIL import Image
import torch
model_state = torch.load("./skin_lesion_model.pth")
model = SimpleSkinLesionModel().to(device)
model.load_state_dict(model_state, strict = False)
model.eval()
image = Image.open(f"example_dataset/dataset00092/a954cebc-6d49-4750-b485-851a307ab3fb.jpg").convert('RGB')
image = image.resize((512, 512))
image = np.array(image, dtype=np.float32)
image = image * (512.0 / 255.0)

image = np.transpose(image, (2, 0, 1))
image = torch.from_numpy(image).to(device)
image = image.unsqueeze(0)
# print(image.shape)
data = torch.tensor([31, 0 ,7] , dtype = torch.float32).unsqueeze(0).to(device)
print(data.shape)
print(image.shape)
result = model(image, data)

print(result)


### sharren/vit-beta2-0.9995 model

In [None]:
import torch
from transformers import ViTForImageClassification, ViTConfig

# Load the configuration
config = ViTConfig.from_pretrained("config.json")

# Load the model
model = ViTForImageClassification.from_pretrained("model.safetensors", config=config)

# Set the model to evaluation mode
model.eval()


In [None]:
import torch
from torchvision import transforms
from PIL import Image

# Define the config_imguration
config_img = {
    "do_normalize": True,
    "do_rescale": True,
    "do_resize": True,
    "image_mean": [0.5, 0.5, 0.5],
    "image_std": [0.5, 0.5, 0.5],
    "size": {"height": 224, "width": 224},
    "rescale_factor": 0.00392156862745098,
    "resample": 2  # Bilinear
}

# Define the preprocessing transformations
transformations = []

if config_img["do_resize"]:
    transformations.append(transforms.Resize((config_img["size"]["height"], config_img["size"]["width"])))
transformations.append(transforms.ToTensor())
if config_img["do_rescale"]:
    transformations.append(transforms.Lambda(lambda x: x * config_img["rescale_factor"]))

if config_img["do_normalize"]:
    transformations.append(transforms.Normalize(mean=config_img["image_mean"], std=config_img["image_std"]))

# Create a composed transformation
preprocess = transforms.Compose(transformations)

# Load and preprocess the image
image_path = "example_dataset/dataset00092/a954cebc-6d49-4750-b485-851a307ab3fb.jpg"  # Change this to your image path
image = Image.open(image_path).convert("RGB")
# image = np.array(image, dtype = np.float32)
# Apply the transformations
input_tensor = preprocess(image)
input_batch = input_tensor.unsqueeze(0)  # Create a mini-batch as expected by the model


In [None]:
# with torch.no_grad():  # Disable gradient calculation
outputs = model(input_batch)


print(outputs)
# Get the predicted class
predicted_class = torch.argmax(outputs.logits, dim=1).item()
print(predicted_class)
# Map the predicted class index to the label
id2label = config.id2label
predicted_label = id2label[predicted_class]

print(f'Predicted class: {predicted_label}')


### Resnet18

In [None]:
model = models.resnet18(pretrained=True)
model