**Code For DIP Project: Neonatal Jaundice Detection Using Color Models and ML Classifiers**



**Name:- Zahir Khan, Roll no:- 112202010**

Data_link: [Click_here](https://doi.org/10.3390/biomedinformatics3030037.)

**Importing Important Libraries**

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os

**Below here is Skin Detection function which returns Skin detected image using HSV and YCbCr color model**

In [2]:

def skin_detection(image_path):
    # Load the image
    img = cv2.imread(image_path)
    img = cv2.resize(img, (256, 256)) #resizing image
    
    # Create a mask for detected skin areas
    skin_mask = np.zeros_like(img)

    # Iterate through each pixel in the image
    height, width, _ = img.shape

    for y in range(height):
        for x in range(width):
            # Extracting RGB values for the pixel
            blue = img[y, x][0]
            green = img[y, x][1]
            red = img[y, x][2]

            # Converting RGB to HSV
            hsv_pixel = cv2.cvtColor(np.uint8([[[blue, green, red]]]), cv2.COLOR_BGR2HSV)[0][0]

            # Converting RGB to YCbCr
            ycbcr_pixel = cv2.cvtColor(np.uint8([[[blue, green, red]]]), cv2.COLOR_BGR2YCrCb)[0][0]

            is_skin = (
                (0 <= hsv_pixel[0] <= 13) and
                (15 <= hsv_pixel[1] <= 170) and 
                (0 <= hsv_pixel[2] <= 255) and
                (0 <= ycbcr_pixel[0] <= 200) and 
                (145 <= ycbcr_pixel[1] <= 180) and
                (85 <= ycbcr_pixel[2] <= 135)
            )


            if is_skin:
                skin_mask[y, x] = img[y, x]  # Store the skin pixel values in the mask

    # Create a black image and add the detected skin areas from the mask
    result = np.zeros_like(img)
    np.copyto(result, skin_mask, where=(skin_mask > 0))  # Copy skin pixels to the result

    return result



**Finding normalized r,g,b, Y, Cb, Cr Values**

In [4]:


def RGB_YCbCr_values(skin_detected_image):
    # Count the number of non-black pixels
    non_black_pixels = np.count_nonzero(np.all(skin_detected_image != 0, axis=-1))

    # Convert the image to YCbCr color space
    ycbcr_image = cv2.cvtColor(skin_detected_image, cv2.COLOR_BGR2YCrCb)

    # Convert the image to HSV color space
    hsv_image = cv2.cvtColor(skin_detected_image, cv2.COLOR_BGR2HSV)

    # Convert the image to LAB color space
    lab_image = cv2.cvtColor(skin_detected_image, cv2.COLOR_BGR2LAB)

    # Create a mask for non-black pixels
    non_black_mask = np.all(skin_detected_image != 0, axis=-1)

    # Compute sums of red, green, and blue values only for non-black pixels
    r_sum = np.sum(skin_detected_image[non_black_mask][:, 2])  # Red channel
    g_sum = np.sum(skin_detected_image[non_black_mask][:, 1])  # Green channel
    b_sum = np.sum(skin_detected_image[non_black_mask][:, 0])  # Blue channel

    # Compute sums of Y, Cb, and Cr values for non-black pixels in the YCbCr image
    y_sum = np.sum(ycbcr_image[non_black_mask][:, 0])  # Y channel
    cblue_sum = np.sum(ycbcr_image[non_black_mask][:, 2])  # Cb channel
    cred_sum = np.sum(ycbcr_image[non_black_mask][:, 1])  # Cr channel

    # Compute sums of H, S, and V values for non-black pixels in the HSV image
    h_sum = np.sum(hsv_image[non_black_mask][:, 0])  # Hue channel
    s_sum = np.sum(hsv_image[non_black_mask][:, 1])  # Saturation channel
    v_sum = np.sum(hsv_image[non_black_mask][:, 2])  # Value channel

    # Compute sums of L, a, and b values for non-black pixels in the LAB image
    l_sum = np.sum(lab_image[non_black_mask][:, 0])  # L* channel
    a_sum = np.sum(lab_image[non_black_mask][:, 1])  # a* channel
    b1_sum = np.sum(lab_image[non_black_mask][:, 2])  # b* channel

    # Return the sums and count of non-black pixels
    result = [r_sum / non_black_pixels, g_sum / non_black_pixels, b_sum / non_black_pixels,
              y_sum / non_black_pixels, cblue_sum / non_black_pixels, cred_sum / non_black_pixels,
              h_sum / non_black_pixels, s_sum / non_black_pixels, v_sum / non_black_pixels,
              l_sum / non_black_pixels, a_sum / non_black_pixels, b1_sum / non_black_pixels]

    return result


In [155]:
RGB_YCbCr_values(skin_detection('jaundice (19).jpg'))
#cv2.imwrite(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\new_nor(578)_new.jpg", img)

[151.1364854984158,
 108.3941018766756,
 79.28783816719474,
 117.89982939312698,
 106.27029003168413,
 151.7284913477943,
 12.11455032902754,
 122.46721910796978,
 151.1364854984158,
 125.97635876188156,
 141.35681208871557,
 150.99585669022667]

**Features Extraction and Storing them in CSV file**

In [6]:
'''
this below function is for extracting the integers inside a string. 
This is used for getting the image_id number for a perticular image during r,g,b,y,cb,cr value extraction 
over the  image folder so that we can identify a list of color value is corresponding to which image
'''
#This function is used to get the image ID number from the image name for storing information in CSV with ID
def extract_consecutive_integers(input_string): 
    integers = []
    current_num = ''
    
    for char in input_string:
        if char.isdigit():
            current_num += char
        else:
            if current_num:
                integers.append(int(current_num))
                current_num = ''
    
    if current_num:
        integers.append(int(current_num))

    return integers[0]

**Iterating over the image folder 'jaundice' and 'normal' for performing the operations for each image**

In [7]:
Normalvalues=[]
for filename in os.listdir(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\normal"):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\normal", filename)
        
        # Read the image
        image = cv2.imread(image_path)
        image1 = skin_detection(image_path)  # Pass the image path
        image_id=[extract_consecutive_integers(image_path)]
        l = image_id + RGB_YCbCr_values(image1)
        l.append(0) #we are leveling normal neonetals as 0
        Normalvalues.append(l)
        print(l)

Jaundice_values=[]
for filename in os.listdir(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\jaundice"):
    if filename.endswith(('.jpg', '.jpeg', '.png')):
        image_path = os.path.join(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\jaundice", filename)
        
        # Read the image
        image = cv2.imread(image_path)
        image1 = skin_detection(image_path)  # Pass the image path
        image_id=[extract_consecutive_integers(image_path)]
        l = image_id + RGB_YCbCr_values(image1)
        l.append(1) #leveling 1 for jaundiced neonetal
        Jaundice_values.append(l)
        print(l)


[1, 177.39162299377662, 130.83503930560104, 109.58352440222731, 142.3370864723223, 109.53521126760563, 153.04307238781527, 9.303103504749426, 99.11705699312152, 177.39162299377662, 149.56096462495907, 143.25216999672455, 146.6422780871274, 0]
[1000, 152.56583560576547, 111.57440592130892, 90.26743280093494, 121.40105181145306, 110.43552785352551, 150.2506817296455, 10.237241916634204, 106.34164394234514, 152.56583560576547, 129.0107128944293, 141.4503311258278, 146.33385274639656, 0]
[1001, 147.56863008184624, 111.17456057963236, 91.91882463437543, 119.84959076881792, 112.2239366697974, 147.76653696498053, 10.320273715282436, 98.46679189588086, 147.56863008184624, 127.26579900711123, 139.75754729639073, 144.38749496846907, 0]
[1002, 133.23974208675264, 87.33939038686987, 73.7327080890973, 99.50732708089097, 113.45486518171161, 152.08763188745604, 6.815357561547479, 115.4803634232122, 133.23974208675264, 106.38335287221571, 145.29572098475967, 143.71834701055099, 0]
[1003, 147.647765933

In [8]:
import random
import csv
#csv_head=[['Image_id','r_value','g_value','b_value', 'Y_value', 'Cb_value', 'Cr_value', 'Label(jaundice=1, normal=0)']]
l=Normalvalues+Jaundice_values
#l=csv_head + l
random.shuffle(l)
file_path= 'Jaundice_feature_data_modify2.csv'
# Writing to the CSV file for training data
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(l)



**Training and Testing using DT, RF, SVM**

In [9]:
import pandas as pd
d=pd.read_csv(r"C:\Users\ZAHIR\OneDrive\Desktop\DIP Project\Jaundice_feature_data_modify2.csv")

In [10]:
d.head()

Unnamed: 0,ID,r_value,g_value,b_value,Y_value,Cb_value,Cr_value,H_value,S_value,V_value,L_value,A_value,B_value,Class
0,44,141.424876,95.036212,66.418354,105.644175,105.873594,153.558705,11.252587,135.924876,141.424876,113.634278,143.559379,151.912731,1
1,781,149.055394,107.880724,97.919589,119.043109,116.058968,149.422381,5.92428,89.432432,149.055394,126.031048,143.084208,140.196337,0
2,188,145.922984,93.706986,68.221758,106.424285,106.454385,156.217298,9.484114,137.46479,145.922984,114.249535,146.558993,151.389075,1
3,1113,169.006331,120.446278,91.452417,131.666875,105.321662,154.678338,11.059286,117.634162,169.006331,139.649223,143.420184,151.402149,0
4,697,187.631314,122.6389,89.007557,138.231297,100.23865,163.263268,10.101901,135.216183,187.631314,146.472098,149.729146,156.778353,0


In [11]:
d.iloc[0,:]['r_value']

141.4248763

In [145]:
# Taking training data (first 600 datas)
x_train=[]
y_train=[]
for i in range(600):
    d1=d.iloc[i,:]
    #x_train.append([d1['H_value'],d1['S_value'],d1['V_value'],d1['L_value'],d1['A_value'],d1['B_value']])
    #x_train.append([d1['Y_value'],d1['Cb_value'],d1['Cr_value'],d1['H_value'],d1['B_value']])
    #s=sum([d1['r_value'],d1['g_value'],d1['b_value'],d1['Y_value'],d1['Cb_value'],d1['Cr_value']])
    #x_train.append([d1['r_value'],d1['Cb_value'],d1['Cr_value']])
    x_train.append([d1['r_value'],d1['g_value'],d1['b_value'],d1['H_value'],d1['S_value'],d1['V_value'],d1['L_value'],d1['A_value'],d1['B_value']])
    y_train.append(d1['Class'])

# Taking training data (last 160 datas)
x_test=[]
y_test=[]
for i in range(600,760):
    d1=d.iloc[i,:]
    #x_test.append([d1['Y_value'],d1['Cb_value'],d1['Cr_value'],d1['H_value'],d1['B_value']])
    x_test.append([d1['r_value'],d1['g_value'],d1['b_value'],d1['H_value'],d1['S_value'],d1['V_value'],d1['L_value'],d1['A_value'],d1['B_value']])
    #x_test.append([d1['Y_value'],d1['Cb_value'],d1['Cr_value'],d1['H_value'],d1['S_value'],d1['A_value'],d1['B_value']])
    #s=sum([d1['r_value'],d1['g_value'],d1['b_value'],d1['Y_value'],d1['Cb_value'],d1['Cr_value']])
    #x_test.append([d1['r_value'],d1['Cb_value'],d1['Cr_value']])
    #x_test.append([d1['H_value'],d1['S_value'],d1['Y_value'],d1['L_value'],d1['A_value'],d1['B_value']])
    y_test.append(d1['Class'])

In [146]:
#importing libraries for classification
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn import metrics
import numpy as np
import pickle
import time
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import svm

In [147]:
#Decision Tree Classification training
dt_clf=tree.DecisionTreeClassifier()
startTime=time.time()
DT_model=dt_clf.fit(np.array(x_train),y_train)
endTime=time.time()
print('Execution time of training in DT model for', len(x_train), 'data is ',endTime-startTime, 'sec')

Execution time of training in DT model for 600 data is  0.017517566680908203 sec


In [148]:
pred_label=DT_model.predict(np.array(x_test))
accuracy = accuracy_score(y_test, pred_label)
print("Accuracy in Decision Tree is: ",accuracy*100,"%")
confusion_matrix(y_test, pred_label)

Accuracy in Decision Tree is:  70.625 %


array([[99, 22],
       [25, 14]], dtype=int64)

In [149]:
#Random forest classifier
rf = RandomForestClassifier(n_estimators = 55, random_state = 42)
startTime=time.time()
RF_model=rf.fit(np.array(x_train),y_train)
endTime=time.time()
print('Execution time of training in RF model for', len(x_train), 'data is ',endTime-startTime, 'sec')

Execution time of training in RF model for 600 data is  0.2932147979736328 sec


In [150]:
pred_label=RF_model.predict(np.array(x_test))
accuracy = accuracy_score(y_test, pred_label)
print("Accuracy in Random Forest is: ",accuracy*100,"%")
confusion_matrix(y_test, pred_label)

Accuracy in Random Forest is:  76.875 %


array([[104,  17],
       [ 20,  19]], dtype=int64)

In [151]:
#Gaussian Naive Byas model
gnb=GaussianNB()
t1=time.time()
GNB_model=gnb.fit(np.array(x_train),y_train)
t2=time.time()
print('Execution time of training in Guassian NB model for', len(x_train), 'segments is ',t2-t1, 'sec')

Execution time of training in Guassian NB model for 600 segments is  0.0055544376373291016 sec


In [152]:
pred_label=GNB_model.predict(np.array(x_test))
accuracy = accuracy_score(y_test, pred_label)
print("Accuracy in GNB model is: ",accuracy*100,"%")
confusion_matrix(y_test, pred_label)

Accuracy in GNB model is:  71.875 %


array([[88, 33],
       [12, 27]], dtype=int64)