In [None]:
def split_data(folder_path):
  images_folder_path = os.path.join(folder_path, 'all') # change to where all pictures are stored
  filenames = [f for f in os.listdir(images_folder_path)]
  train_imgs, test_imgs = train_test_split(filenames, test_size=0.2, random_state=42)
  train_imgs, val_imgs = train_test_split(train_imgs, test_size=0.2, random_state=42)
  os.makedirs(os.path.join(folder_path, 'train_1'), exist_ok=True)
  os.makedirs(os.path.join(folder_path, 'val_1'), exist_ok=True)
  os.makedirs(os.path.join(folder_path, 'test_1'), exist_ok=True)
  for img in train_imgs:
    shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'train_1', img))
  for img in val_imgs:
    shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'val_1', img))
  for img in test_imgs:
    shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'test_1', img))


In [None]:
def split_data_train_test(folder_path):
  images_folder_path = os.path.join(folder_path, 'all') # change to where all pictures are stored
  filenames = [f for f in os.listdir(images_folder_path)]
  train_imgs, test_imgs = train_test_split(filenames, test_size=0.2, random_state=42)
  os.makedirs(os.path.join(folder_path, 'train_final'), exist_ok=True)
  #os.makedirs(os.path.join(folder_path, 'val_final'), exist_ok=True)
  os.makedirs(os.path.join(folder_path, 'test_final'), exist_ok=True)
  for img in train_imgs:
    shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'train_final', img))
  # for img in val_imgs:
  #   shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'val_final', img))
  for img in test_imgs:
    shutil.move(os.path.join(images_folder_path, img), os.path.join(folder_path, 'test_final', img))


In [None]:
def run_segmentation_model(model_path, train_path, val_path, test_path):
  model = YOLO(model_path)
  results_train = model(train_path, save=True)
  results_val = model(val_path, save=True)
  results_test = model(test_path, save=True)
  return model, results_train, results_val, results_test

In [None]:
def created_model_input(results,):
  nail_class = 0
  blue_circle_class_id = 1

  top_nail_bounds = {}
  normalized = {}
  normalized_padded = {}

  for result in results:
    image_name = result.path.split('/')[-1]
    boxes = result.boxes.xyxy.cpu().numpy()  # Bounding boxes (x_min, y_min, x_max, y_max)
    scores = result.boxes.conf.cpu().numpy()  # Confidence scores
    classes = result.boxes.cls.cpu().numpy()  # Class IDs

    # Filter nail boxes
    nail_boxes = [
        (box, score) for box, score, cls in zip(boxes, scores, classes) if cls == nail_class
        ]
    # Sort by confidence and take the top 3
    nail_boxes = sorted(nail_boxes, key=lambda x: x[1], reverse=True)[:3]
    top_nail_bounds[image_name] = nail_boxes

    normalized_images = []
    image_path = result.path
    image = cv2.imread(image_path)

    blue_circle_box = next(
        (box for box, cls in zip(result.boxes.xyxy.cpu().numpy(), result.boxes.cls.cpu().numpy())
        if cls == blue_circle_class_id),
        None
        )
    if blue_circle_box is None:
      print(image_name)
      continue

    # Compute the center pigment value
    x_min, y_min, x_max, y_max = map(int, blue_circle_box)
    center_x = (x_min + x_max) // 2
    center_y = (y_min + y_max) // 2
    center_pixel_value = image[center_y, center_x]  # BGR pixel value

    # Normalize nail bounding boxes
    for (box, score) in top_nail_bounds.get(image_name, []):
      x_min, y_min, x_max, y_max = map(int, box)
      nail_region = image[y_min:y_max, x_min:x_max]

      # Normalize pixel values by center pigment
      normalized_nail = nail_region / center_pixel_value
      normalized_images.append(nail_region)#(normalized_nail)

    normalized[image_name] = normalized_images

  for key, images in normalized.items():
    while len(images) < 3:
        images.append(images[-1])  # Duplicate the last image until there are 3
    normalized_padded[key] = images[:3]

  return normalized_padded

In [None]:
def obtain_hb_data(hb_file, sheet_name):
  hb_data = pd.read_excel(hb_file, sheet_name = sheet_name)
  return hb_data

In [None]:
def label_with_hb(fingernail_hb_data, normalized):
  transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224))
  ])

  inputs = []
  targets = []

  for _, row in fingernail_hb_data.iterrows():
    image_name = row['Right_finger_nail_Image']
    hb_value = torch.tensor(row['Hb_Value'], dtype=torch.float32)

    if image_name in normalized:
        images = normalized[image_name]
        combined_image = torch.cat([transform(img) for img in images], dim=2)
        inputs.append(combined_image)
        targets.append(hb_value)
    else:
      print(image_name)

  inputs = torch.stack(inputs)
  targets = torch.tensor(targets)

  return inputs, targets

In [None]:
def augment_extremes(train_inputs, train_targets, hb_data):
  hb_values = hb_data['Hb_Value'].values
  image_names = hb_data['Right_finger_nail_Image'].values

  augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=(-30, 30)),  # Random rotations between -30 and 30 degrees
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip
    transforms.RandomVerticalFlip(p=0.5),  # Random vertical flip
    transforms.ToTensor()  # Convert to tensor
    ])

  indices_to_transform = random.sample(range(len(train_inputs)), len(train_inputs) // 2)

  transformed_images = []

  for idx in indices_to_transform:
    pil_image = transforms.ToPILImage()(train_inputs[idx])  # Convert tensor to PIL image
    transformed_image = augmentation_transforms(pil_image)  # Apply transformations
    transformed_images.append(transformed_image)  # Store transformed image

  transformed_images_tensor = torch.stack(transformed_images)

    # Combine the original images with the transformed ones
  all_images = torch.cat([train_inputs, transformed_images_tensor], dim=0)
  all_targets = torch.cat([train_targets, train_targets[indices_to_transform]], dim=0)

  return all_images, all_targets

  # low_threshold = np.percentile(hb_values, 10)
  # high_threshold = np.percentile(hb_values, 90)

  # low_hb_indices = torch.where(train_targets <= low_threshold)[0]
  # high_hb_indices = torch.where(train_targets >= high_threshold)[0]

  # low_hb_images = train_inputs[low_hb_indices]
  # high_hb_images = train_inputs[high_hb_indices]
  # low_hb_values = train_targets[low_hb_indices]
  # high_hb_values = train_targets[high_hb_indices]

  # duplicated_low_images = low_hb_images.clone()
  # duplicated_high_images = high_hb_images.clone()
  # duplicated_low_values = low_hb_values.clone()
  # duplicated_high_values = high_hb_values.clone()

  # low_rotate_indices = random.sample(range(len(duplicated_low_images)), len(duplicated_low_images) // 2)
  # high_rotate_indices = random.sample(range(len(duplicated_high_images)), len(duplicated_high_images) // 2)


  # rotate_90 = transforms.Compose([
  #       transforms.ToPILImage(),        # Convert tensor to PIL image
  #       transforms.RandomRotation([90, 90]),  # Rotate by 90 degrees
  #       transforms.ToTensor()          # Convert back to tensor
  #   ])

  # for idx in low_rotate_indices:
  #     duplicated_low_images[idx] = rotate_90(duplicated_low_images[idx])
  # for idx in high_rotate_indices:
  #     duplicated_high_images[idx] = rotate_90(duplicated_high_images[idx])

  # augmented_inputs = torch.cat([train_inputs, duplicated_low_images, duplicated_high_images], dim=0)
  # augmented_targets = torch.cat([train_targets, duplicated_low_values, duplicated_high_values], dim=0)

  # return augmented_inputs, augmented_targets

In [None]:
from torchvision import transforms

# Augmentations: Rotations, Cropping, Flips
augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=(-30, 30)),  # Random rotations between -30 and 30 degrees
    transforms.RandomHorizontalFlip(p=0.5),  # Random horizontal flip
    transforms.RandomVerticalFlip(p=0.5),  # Random vertical flip
    transforms.ToTensor()  # Convert to tensor
])

In [None]:
def convert_to_tensors(inputs, targets, batch_size, shuffle, transform=None):
  if transform:
    inputs = torch.stack([transform(input_image) for input_image in inputs])
  data = torch.utils.data.TensorDataset(inputs, targets)
  loader = torch.utils.data.DataLoader(data, batch_size=batch_size, shuffle=shuffle)
  return data, loader

In [None]:
def regression_model_setup():
  class EfficientNet9ChannelsModel(nn.Module):
    def __init__(self, pretrained=True):
        super(EfficientNet9ChannelsModel, self).__init__()
        self.efficientnet = models.efficientnet_b0(pretrained=pretrained)

        self.efficientnet.features[0][0] = nn.Conv2d(
            3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
        )

        # Add dropout to features for regularization
        self.dropout = nn.Dropout(p=0.3)

        # Modify the classifier to output 1 value (for regression)
        self.efficientnet.classifier = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self.efficientnet.classifier[1].in_features, 1)
        )

    def forward(self, x):
        x = self.efficientnet.features(x)
        x = self.dropout(x)  # Apply dropout to the features
        x = self.efficientnet.avgpool(x)  # Use the pre-defined avgpool
        x = torch.flatten(x, 1)
        x = self.efficientnet.classifier(x)
        return x

  # class MobileNet9ChannelsModel(nn.Module):
  #   def __init__(self, pretrained=True):
  #       super(MobileNet9ChannelsModel, self).__init__()
  #       # Load the pre-trained MobileNetV2 model
  #       self.mobilenet = models.mobilenet_v2(pretrained=pretrained)

  #       self.mobilenet.features[0][0] = nn.Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

  #       self.mobilenet.classifier[1] = nn.Linear(self.mobilenet.classifier[1].in_features, 1)

  #   def forward(self, x):
  #       return self.mobilenet(x)

  model = EfficientNet9ChannelsModel(pretrained=True) #MobileNet9ChannelsModel(pretrained=True)
  criterion = nn.MSELoss()
  optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)

  return model, criterion, optimizer

In [None]:
def k_fold_train_model(model_class, inputs, targets, criterion, optimizer_class, num_epochs, k, batch_size):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dataset = TensorDataset(inputs, targets)  # Combine inputs and targets into a dataset
    kfold = KFold(n_splits=k, shuffle=True, random_state=42)
    fold_results = []

    for fold, (train_idx, val_idx) in enumerate(kfold.split(dataset)):
        print(f"\nFold {fold+1}/{k}")

        train_subset = Subset(dataset, train_idx)
        val_subset = Subset(dataset, val_idx)
        train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)

        model = model_class.to(device)
        optimizer = optimizer_class(model.parameters(), lr=0.001, weight_decay=1e-4)

        for epoch in range(num_epochs):
            model.train()
            running_loss = 0.0

            for batch_inputs, batch_targets in train_loader:
                batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)

                batch_inputs = inputs.float()
                batch_targets = targets.float()

                optimizer.zero_grad()
                outputs = model(batch_inputs)
                loss = criterion(outputs.squeeze(), batch_targets)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for batch_inputs, batch_targets in val_loader:
                    batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)

                    batch_inputs = inputs.float()
                    batch_targets = targets.float()


                    outputs = model(batch_inputs)
                    loss = criterion(outputs.squeeze(), batch_targets)
                    val_loss += loss.item()

            print(f"Validation Loss: {val_loss / len(val_loader):.4f}")

        fold_results.append(val_loss / len(val_loader))

    print("\nCross-Validation Results:")
    print(f"Mean Validation Loss: {sum(fold_results) / k:.4f}")
    print(f"Loss per Fold: {fold_results}")


In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            inputs = inputs.float()
            targets = targets.float()

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)

                inputs = inputs.float()
                targets = targets.float()

                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), targets)
                val_loss += loss.item()

        print(f"Validation Loss: {val_loss / len(val_loader):.4f}")

In [None]:
def test_model(model, test_loader, criterion, tolerance=0.5, display_samples=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    test_loss = 0.0
    all_targets = []
    all_predictions = []
    sample_count = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            inputs = inputs.float()
            targets = targets.float()

            outputs = model(inputs)
            predictions = outputs.view(-1) #squeeze()

            loss = criterion(predictions, targets)
            test_loss += loss.item()

            all_targets.extend(targets.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())

            if sample_count < display_samples:
                for i in range(len(targets)):
                    print(f"Sample {sample_count + 1}:")
                    print(f"  Predicted: {predictions[i].item():.4f}")
                    print(f"  Actual:    {targets[i].item():.4f}")
                    sample_count += 1
                    if sample_count >= display_samples:
                        break

    plt.scatter(all_targets, all_predictions, color='blue', label='Predicted vs Actual')
    plt.plot([min(all_targets), max(all_targets)], [min(all_targets), max(all_targets)], color='red', linestyle='--', label='Perfect Prediction Line')
    plt.title("Regression: Actual vs Predicted Hb Values")
    plt.xlabel("Actual Hb Values")
    plt.ylabel("Predicted Hb Values")
    plt.legend()
    plt.grid(True)
    plt.show()

    # Calculate metrics
    mae = mean_absolute_error(all_targets, all_predictions)
    mse = mean_squared_error(all_targets, all_predictions)
    rmse = torch.sqrt(torch.tensor(mse))
    r2 = r2_score(all_targets, all_predictions)

    # Display results
    print(f"\nTest Loss: {test_loss / len(test_loader):.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R-squared (R²): {r2:.4f}")

    return {"mae": mae, "mse": mse, "rmse": rmse, "r2": r2}

In [None]:
!pip install ultralytics

from google.colab import drive
from ultralytics import YOLO
import pandas as pd
import torch
from torchvision import transforms
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import os
import shutil
from sklearn.model_selection import train_test_split
import zipfile
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, Subset
import random
import numpy as np
import torch
from torchvision import transforms


drive.mount('/content/drive')
folder_path = '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail'
model_path = '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/best.pt'


split_data(folder_path)
#split_data_train_test(folder_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
model, results_train, results_val, results_test = run_segmentation_model('/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/best.pt',
                                                            '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1',
                                                            '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/val_1',
                                                            '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/test_1')
segmented_train_input = created_model_input(results_train)
segmented_val_input = created_model_input(results_val)
segmented_test_input = created_model_input(results_test)




image 1/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709617374918.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 1300.6ms
image 2/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709617703730.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 1641.3ms
image 3/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709618658975.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 2970.0ms
image 4/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709619498294.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 2897.5ms
image 5/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709620017738.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 1657.8ms
image 6/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709620997717.jpg: 1024x1024 4 nails, 1 colorcard, 1 bluecircle, 1699.9ms
image 7/326 /content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_1/1709621336576.jpg: 1024x1024 4 nails,

In [None]:
hb_data = obtain_hb_data("/content/drive/MyDrive/AI_ML_Project/Anemia_dataset_train.xlsx", 'Right_Finger_Nail_Data')

print("Before Labeling...")
print(len(segmented_train_input))
print(len(segmented_val_input))
print(len(segmented_test_input))


train_inputs, train_targets = label_with_hb(hb_data, segmented_train_input)
val_inputs, val_targets = label_with_hb(hb_data, segmented_val_input)
test_inputs, test_targets = label_with_hb(hb_data, segmented_test_input)

print("After Labeling...")
print(len(train_inputs))
print(len(train_targets))
print(len(val_inputs))
print(len(val_targets))
print(len(test_inputs))
print(len(test_targets))

Before Labeling...
326
68
55
1709625695477.jpg
1709627557684.jpg
1709639981572.jpg
1709620482923.jpg
31709634792532.jpg
21709623630933.jpg
31709623630933.jpg
1709625155249.jpg
21709625155249.jpg
21709630063667.jpg
21709635011466.jpg
1709636926027.jpg
21709637756921.jpg
1709642357991.jpg
31709627275580.jpg
1709630816195.jpg
21709641895047.jpg
21709702159121.jpg
21709708573764.jpg
21709713864867.jpg
31709702513283.jpg
1709709029152.jpg
1709712899206.jpg
21709723915521.jpg
21709709028248.jpg
1709720647227.jpg
1709703297071.jpg
1709706262427.jpg
1709707663987.jpg
21709709179659.jpg
1709724444782.jpg
1709725497306.jpg
1709792080268.jpg
1709793075008.jpg
1709794390243.jpg
1709795312517.jpg
21709796821857.jpg
1709798568756.jpg
1709799457005.jpg
21709799457005.jpg
1709808158048.jpg
21709808158048.jpg
1709808998593.jpg
21709809879738.jpg
41709809879738.jpg
1709810569458.jpg
21709789470712.jpg
1709791542264.jpg
21709791542264.jpg
31709792753872.jpg
1709796395301.jpg
21709797511556.jpg
4170979751

In [None]:
# print("Before Augmentation...")
# print(len(train_inputs))
# print(len(train_targets))

# train_inputs_aug, train_targets_aug = augment_extremes(train_inputs, train_targets, hb_data)

# print("After Augmentation...")
# print(len(train_inputs_aug))
# print(len(train_targets_aug))

train_data, train_loader = convert_to_tensors(train_inputs, train_targets, batch_size=16, shuffle=True)
val_data, val_loader = convert_to_tensors(val_inputs, val_targets, batch_size=16, shuffle=False)
test_data, test_loader = convert_to_tensors(test_inputs, test_targets, batch_size=16, shuffle=False)


In [None]:
torch.save(train_data, '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/train_data_orig.pt')
torch.save(val_data, '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/val_data_orig.pt')
torch.save(test_data, '/content/drive/MyDrive/AI_ML_Project/Right_Fingernail/test_data_orig.pt')