# Projects in data science, Project: Skin lesions

# Part 1 extracting features

##### Imports

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import segmentation, color
import csv
from skimage.feature import graycomatrix, graycoprops
from skimage import io, color, img_as_ubyte
import skimage.util as util
from skimage.metrics import structural_similarity as ssim

  "class": algorithms.Blowfish,


##### File paths, please update before running

In [2]:
Lesion_images_folder="C:\\Users\\krist\\Desktop\\ITU\\2_Semester\\projects_in_datascience\\first_year_project\\groupN_images"
Lesion_masks_folder="C:\\Users\\krist\\Desktop\\ITU\\2_Semester\\projects_in_datascience\\first_year_project\\groupN_masks"
Metadata_path="C:\\Users\\krist\\Desktop\\ITU\\2_Semester\\projects_in_datascience\\first_year_project\\metadata.csv"

# Functions for extracting features/attributes

### Asymmetri

In [3]:
def measure_symmetry(mask_path, image_path):
    """
    Measure symmetry of a binary mask and color similarity within the masked area.

    Args:
        mask_path (str): Path to the binary mask image file.
        image_path (str): Path to the original image corresponding to the mask.

    Returns:
        float: Symmetry score between 0 and 1, where 1 indicates perfect symmetry.
        float: Color-based symmetry score based on color similarity within the masked area.
    """
    # Load the binary mask image
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    
    # Load the original image corresponding to the mask
    original_image = cv2.imread(image_path)

    # Calculate the total area (number of non-zero pixels) of the mask
    area_total = np.count_nonzero(mask)

    if area_total == 0:
        return 0.0, 0.0  # If the mask is empty, return symmetry scores of 0

    # Compute major and minor axes using PCA (Principal Component Analysis)
    masked_pixels = np.transpose(np.nonzero(mask))
    com_x, com_y = np.mean(masked_pixels, axis=0)
    cov_matrix = np.cov(masked_pixels, rowvar=False)
    _, eigenvectors = np.linalg.eigh(cov_matrix)
    pc1_x, pc1_y = eigenvectors[:, 0]  # Major axis
    pc2_x, pc2_y = eigenvectors[:, 1]  # Minor axis

    # Flip mask over major and minor axes
    mask_major_axis = np.fliplr(mask)  # Horizontal flip (major axis)
    mask_minor_axis = np.flipud(mask)  # Vertical flip (minor axis)

    # Calculate intersection areas between original mask and flipped versions
    intersection_major = mask & mask_major_axis
    intersection_minor = mask & mask_minor_axis

    # Calculate symmetry scores based on intersection areas
    symmetry_major = np.count_nonzero(intersection_major) / area_total
    symmetry_minor = np.count_nonzero(intersection_minor) / area_total

    # Calculate overall symmetry score (average of major and minor axes)
    symmetry_score = 1 - 0.5 * (symmetry_major + symmetry_minor)

    # Extract masked regions from the original image
    masked_region = original_image[mask > 0]
    masked_major_axis = original_image[mask_major_axis > 0]
    masked_minor_axis = original_image[mask_minor_axis > 0]

    # Calculate SSIM (Structural Similarity Index) for color similarity
    ssim_major = ssim(masked_region, masked_major_axis, win_size=min(masked_region.shape[0], masked_region.shape[1]), multichannel=True)
    ssim_minor = ssim(masked_region, masked_minor_axis, win_size=min(masked_region.shape[0], masked_region.shape[1]), multichannel=True)

    # Calculate color-based symmetry score (average SSIM)
    color_symmetry_score = 1 - 0.5 * (ssim_major + ssim_minor)

    return [1-symmetry_major, 1-symmetry_minor, 1-ssim_major, 1-ssim_minor, symmetry_score, color_symmetry_score]

### Colour

In [4]:
def analyze_and_visualize_segmentation(image_path, mask_path, lst_compactness):
    # Load RGB image and mask
    rgb_img = plt.imread(image_path)[:,:,:3]
    mask = plt.imread(mask_path)

    # Replace pixels within the lesion area with the average color of the lesion
    img_avg_lesion = rgb_img.copy()
    for i in range(3):
        channel = img_avg_lesion[:,:,i]
        mean = np.mean(channel[mask.astype(bool)])
        channel[mask.astype(bool)] = mean
        img_avg_lesion[:,:,i] = channel

    # Crop the lesion area from the original image
    lesion_coords = np.where(mask != 0)
    min_x, max_x = min(lesion_coords[0]), max(lesion_coords[0])
    min_y, max_y = min(lesion_coords[1]), max(lesion_coords[1])
    cropped_lesion = rgb_img[min_x:max_x, min_y:max_y]

    # Initialize lists to store average colors and standard deviations
    avg_colors = []
    std_devs = []

    # Perform SLIC segmentation and calculate average colors for the lesion
    for c in lst_compactness:
        labels = segmentation.slic(cropped_lesion, compactness=c, n_segments=30, sigma=3, start_label=1)
        avg_color_lesion = np.mean(cropped_lesion[labels != 0], axis=0)
        std_dev_lesion = np.std(cropped_lesion[labels != 0], axis=0)
        avg_colors.append(avg_color_lesion)
        std_devs.append(std_dev_lesion)

    # Calculate the average RGB values and standard deviations
    avg_avg_colors = np.mean(avg_colors, axis=0)
    avg_std_devs = np.mean(std_devs, axis=0)

    return list(avg_avg_colors)+list(avg_std_devs)


### Blue-White veil

In [5]:
def classify_pixel_as_veil(rgb_img):
    veil_count = 0
    
    for i in range(rgb_img.shape[0]):
        for j in range(rgb_img.shape[1]):
            if len(rgb_img[i, j]) == 3:  # Check if pixel has RGB values
                R, G, B = rgb_img[i, j]
                Lum = R + G + B
                nB = B / Lum

                if nB >= 0.3 and 0.6 <= Lum <= 2:
                    veil_count += 1
                
    return veil_count

def analyze_and_count_veil(image_path, mask_path):
    # Load RGB image and mask
    rgb_img = plt.imread(image_path).astype(np.float32)  # Ensure float32 type
    mask = plt.imread(mask_path)
    
    # Apply mask to RGB image
    masked_rgb_img = rgb_img * mask[:, :, np.newaxis]
    
    # Classify each pixel as veil or non-veil and count veil pixels
    veil_count = classify_pixel_as_veil(masked_rgb_img)
    
    return veil_count

### Haralick texture

In [6]:
def compute_haralick_texture_features(image_path, mask_path):
    image = io.imread(image_path)
    mask = io.imread(mask_path)
    
    # Remove alpha channel if present
    if image.shape[2] == 4:
        image = image[:, :, :3]  # Remove the alpha channel
    
    # Convert RGB image to grayscale
    gray_image = color.rgb2gray(image)
    # Convert grayscale image to unsigned integer type
    gray_image_uint = util.img_as_ubyte(gray_image)
    
    # Apply mask to image
    masked_image = image * mask[:, :, np.newaxis]
    
    # Compute gray-level co-occurrence matrix (GLCM)
    distances = [1]  # distance between pixels
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]  # angles for texture measurements
    glcm = graycomatrix(gray_image_uint, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
    
    # Compute Haralick texture features
    contrast = graycoprops(glcm, 'contrast').ravel().mean()
    dissimilarity = graycoprops(glcm, 'dissimilarity').ravel().mean()
    homogeneity = graycoprops(glcm, 'homogeneity').ravel().mean()
    energy = graycoprops(glcm, 'energy').ravel().mean()
    correlation = graycoprops(glcm, 'correlation').ravel().mean()
    
    return [contrast, dissimilarity, homogeneity, energy, correlation]




### Diagnostic from metadata

In [7]:
def get_diagnostic(img_name):
    
    search_value=img_name+".png"
    diagnostic = None
    
    with open(Metadata_path, 'r') as csv_file:
        reader = csv.DictReader(csv_file, delimiter=',')
        
        for row in reader:
            if row["img_id"] == search_value:
                diagnostic = row["diagnostic"]
                break  # Stop searching once the value is found
    if diagnostic in ["BCC","SCC","MEL"]:
        return(1)
    else:
        return(0)


# Extracting and saving feature values

In [20]:
Asymmetry_results={}
# Loop through each image in the folder
for filename in os.listdir(Lesion_images_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust file extensions as needed
        image_path = os.path.join(Lesion_images_folder, filename)
        mask_path = os.path.join(Lesion_masks_folder, os.path.basename(os.path.splitext(image_path)[0]) + "_mask.png")
        symmetry = measure_symmetry(mask_path, image_path)
        ID = filename.replace(".png","");
        Asymmetry_results[ID]= symmetry
#Asymmetry_results      


In [9]:
Colour_results = {}
# Loop through each image in the folder
for filename in os.listdir(Lesion_images_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust file extensions as needed
        img_path = os.path.join(Lesion_images_folder, filename)

        # Find corresponding mask
        corresponding_mask = os.path.join(Lesion_masks_folder, os.path.basename(os.path.splitext(img_path)[0]) + "_mask.png")
        
        # Analyze and visualize segmentation
        result = analyze_and_visualize_segmentation(img_path, corresponding_mask, [7])
        
        # Append results to Colour_results list
        ID=filename.replace(".png","");
        Colour_results[ID]=result

#print(Colour_results)


Blue-White tager nogle minutter.

In [10]:
Blue_white_veil_results = {}
i = 1
# Loop through each image in the folder
for filename in os.listdir(Lesion_images_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust file extensions as needed
        img_path = os.path.join(Lesion_images_folder, filename)
        print(i)
        i += 1
        # Find corresponding mask
        corresponding_mask = os.path.join(Lesion_masks_folder, os.path.basename(os.path.splitext(img_path)[0]) + "_mask.png")
        
        # cound blue white veils pixels
        result = analyze_and_count_veil(img_path, corresponding_mask)
        
        # Append results to Blue_white_veil_results list
        ID=filename.replace(".png","");
        Blue_white_veil_results[ID]=result



1
2
3
4


  nB = B / Lum


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126


In [11]:
haralick_results = {}

# Loop through each image in the folder
for filename in os.listdir(Lesion_images_folder):
    if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust file extensions as needed
        img_path = os.path.join(Lesion_images_folder, filename)
        
        # Find corresponding mask
        corresponding_mask = os.path.join(Lesion_masks_folder, os.path.basename(os.path.splitext(img_path)[0]) + "_mask.png")
        
        # compute haralick
        result = compute_haralick_texture_features(img_path, corresponding_mask)
        
        # Append results to haralick results
        ID=filename.replace(".png","");
        haralick_results[ID]=result


# Creating CSV file and and headers

### Create file and add headers

In [12]:
# Get the current working directory
current_directory = os.getcwd()
# Define the filename for the CSV file
csv_filename = "Attributes2.csv"
# Construct the full path to the CSV file
csv_filepath = os.path.join(current_directory, csv_filename)
# Create the empty CSV file
with open(csv_filepath, 'w') as csv_file:
    pass  # This just creates an empty file
print("Empty CSV file created at:", csv_filepath)

# Define the list of names for the rows
row_names = ["lesion_name", "symmetry_major", "symmetry_minor", "ssim_major", "ssim_minor, symmetry_score", "color_symmetry_score", "avg_red_value", "avg_blue_value", "avg_green_value","avg_red_std_dev", "avg_blue_std_dev", "avg_red_std_dev","blue_white_veil_score","Haralick_contrast", "Haralick_dissimilarity", "Haralick_homogeneity", "Haralick_energy", "Haralick_correlation","is_cancer_bool"]

# Write the row names to the CSV file
with open(csv_filepath, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(row_names)
print("headers added at:", csv_filepath)


Empty CSV file created at: C:\Users\krist\Desktop\ITU\2_Semester\projects_in_datascience\first_year_project\Attributes2.csv
headers added at: C:\Users\krist\Desktop\ITU\2_Semester\projects_in_datascience\first_year_project\Attributes2.csv


# Combine Attribute dictionaries into a single list of lists

In [21]:
#make the list
Attributelist=[]

#add values
for key in Asymmetry_results.keys():
    row=[]
    row.append(key)
    row=row+Asymmetry_results[key]
    row=row+Colour_results[key]
    row.append(Blue_white_veil_results[key])
    row=row+haralick_results[key]
    row.append(get_diagnostic(key))
    Attributelist.append(row)
    

# Add attributes to the csv

In [22]:
with open(csv_filepath, 'a', newline='') as csv_file:
    writer = csv.writer(csv_file)
    for row in Attributelist:
        writer.writerow(row)

print("Attributs appended to the CSV file at:", csv_filepath)

Attributs appended to the CSV file at: C:\Users\krist\Desktop\ITU\2_Semester\projects_in_datascience\first_year_project\Attributes2.csv
