# Progress

- **Datasets**
  - [x] Read HIS2828 dataset
  - [x] Read ISIC2017 dataset
- **Tradicional features**
  - [x] Color moment features
  - [x] Texture features
  - [ ] SVM with tradicional features only
- **Deep features**: Coding network (CNN)
  - [x] Create network architecture
  - [ ] Train
- **Fusion methods**
  - [ ] CNMP (multilayer perceptron as fusion method)
  - [ ] R feature fusion (manully fixed parameter)
  - [ ] KPCA feature fusion
  - [ ] SVM feature fusion (SVM as fusion method)

In [22]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms

import numpy as np
import pathlib
import pandas as pd
import PIL
import cv2

from skimage.feature.texture import greycomatrix, greycoprops
from skimage.measure import shannon_entropy

# HIS2828 dataset

In [2]:
class HistologyDataset(torch.utils.data.Dataset):

	def __init__(self):
		data_dir_hdd = pathlib.Path("D:/Datasets/TFM/histologyDS2828")
		csv_file     = data_dir_hdd / "imageClasses.txt"
		csv_df       = pd.read_csv(csv_file, header=None, delim_whitespace=True, names=['Image', 'Label'])
		mean         = [0.485, 0.456, 0.406] #[0.5, 0.5, 0.5]
		std          = [0.229, 0.224, 0.225] #[0.5, 0.5, 0.5]
        
		self.image_dir  = data_dir_hdd / "imgs"
		self.images     = (csv_df["Image"]).values
		self.labels     = (csv_df["Label"]-1).values
		self.labels_map = {0: "conective tissue", 1: "ephitelial tissue", 2: "muscular tissue", 3: "nervous tissue"}
		self.transforms = transforms.Compose([transforms.RandomCrop(420),
                                              transforms.Resize(140),
                                              transforms.RandomHorizontalFlip(),
                                              transforms.RandomVerticalFlip(),
                                              transforms.ToTensor(),
                                              transforms.Normalize(mean, std)])
	def __len__(self):
		return len(self.labels)

	def __getitem__(self, idx):
		img_name = self.image_dir / self.images[idx]
		image = PIL.Image.open(img_name)
		if self.transforms: image = self.transforms(image)
		label = self.labels[idx]
		return image, label
    
micro_ds    = HistologyDataset()
print("There are", len(micro_ds), "images in the dataset.")

There are 2828 images in the dataset.


# ISIC2017 dataset

In [3]:
class SkinDataset(torch.utils.data.Dataset):

	def __init__(self, subset, transforms=False):
		dataset_dir = pathlib.Path("D:/Datasets/TFM/ISIC-2017")
		csv_file    = dataset_dir / ("ground_truth_"+subset+".csv")
		csv_df      = pd.read_csv(csv_file)

		self.image_dir  = dataset_dir / ("data_"+subset)
		self.images     = (csv_df["image_id"]+".jpg").values
		self.labels1    = (csv_df["melanoma"]).values
		self.labels2    = (csv_df["seborrheic_keratosis"]).values
		self.labels_map = {0:"melanoma", 1:"seborrheic", 2:"healthy"}
		self.transforms = transforms
        
	def __len__(self):
		return len(self.labels1)

	def __getitem__(self, idx):
		img_name = self.image_dir / self.images[idx]
		image = PIL.Image.open(img_name)
		if self.transforms: image = self.transforms(image)
		label = self.labels1[idx]
		return image, label

skin_ds    = {subset: SkinDataset(subset) for subset in ["train", "valid", "test"]}
{print("There are", len(skin_ds[subset]), "images in the "+subset+" dataset.") for subset in ["train", "valid", "test"]};

There are 2000 images in the train dataset.
There are 150 images in the valid dataset.
There are 600 images in the test dataset.


# Texture features

1. First acquire the gray-level co-occurrence matrix G (2 distances, 4 angles = 2*4 = 8 matrices)
2. Then, we employ:
   - The angular second moment (ASM)
   - Entropy (ENT)
   - Contrast (CON)
   - Correlation (COR)
   

8 matrices * 4 features each = 32 total features

- [Scikit-image texture features](http://scikit-image.org/docs/0.7.0/api/skimage.feature.texture.html)
- https://stackoverflow.com/questions/50834170/image-texture-with-skimage
- https://stackoverflow.com/questions/51172555/greycomatrix-for-rgb-image
- [Calculating **entropy** from GLCM of an image](https://stackoverflow.com/questions/40919936/calculating-entropy-from-glcm-of-an-image)
- [Understanding texture properties of a grey-level co-occurrence matrix (GLCM)](https://stackoverflow.com/questions/51463436/understanding-texture-properties-of-a-grey-level-co-occurrence-matrix-glcm)

In [50]:
img = cv2.imread('D:/Datasets/TFM/ISIC-2017/data_train/ISIC_0000000.jpg', cv2.IMREAD_GRAYSCALE)
#print("image shape:", img.shape)


distances  = [1,2]
angles     = [0, np.pi/4, np.pi/2, 3*np.pi/4]  # 0, 45, 90, 135 degree in radians.
glcm = greycomatrix(img, distances, angles)
#print("GLCM shape:", glcm.shape)


properties = ['ASM', 'contrast', 'correlation']
some_texture_feats = np.hstack([greycoprops(glcm, prop).ravel() for prop in properties])

entropy_feat = [shannon_entropy(glcm[:,:,x,y])  for x in range(2)   for y in range(4)]

all_texture_feats = np.hstack([some_texture_feats, entropy_feat])
print("texture feats:", all_texture_feats)

texture feats: [3.46578798e+09 2.42953945e+09 3.55724244e+09 2.48648134e+09
 2.26924248e+09 2.42953945e+09 2.41551461e+09 2.48648134e+09
 6.50161900e+06 1.36685180e+07 7.67135300e+06 1.32202980e+07
 1.04296260e+07 1.36685180e+07 1.21967730e+07 1.32202980e+07
 1.00000566e+00 1.00002422e+00            nan 1.00002798e+00
 1.00215181e+00 1.00002422e+00            nan 1.00002798e+00
 8.32118415e-01 9.91823435e-01 8.40996059e-01 9.72502707e-01
 1.06833245e+00 9.91823435e-01 1.06200750e+00 9.72502707e-01]


numpy.ndarray

# Color moment features
- https://en.wikipedia.org/wiki/Color_moments
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.moment.html
- https://stackoverflow.com/questions/38182087/third-order-moment-calculation-numpy
- https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skew.html

In [49]:
import numpy as np
from scipy.stats import skew, kurtosis

img      = skin_ds["train"][0][0]
np_image = np.array(img)

r = np_image[:, :, 0].reshape(-1)
g = np_image[:, :, 1].reshape(-1)
b = np_image[:, :, 2].reshape(-1)

# First color moment (Mean)
mean_r = np.mean(r)
mean_g = np.mean(g)
mean_b = np.mean(b)

# Second color moment (Standard deviation)
std_r = np.std(r)
std_g = np.std(g)
std_b = np.std(b)

# Third color moment (Skewness)
skew_r = skew(r)
skew_g = skew(g)
skew_b = skew(b)



# Coding Network architecture

Input size of `3×140×140`

Layer       | Kernel | Stride | Output size
------------|--------|--------|------------
Convolution | 11×11  |    1   | 32×130×130
Convolution | 11×11  |    1   | 32×120×120
Max pooling | 5×5    |    2   | 32×58×58
Convolution | 9×9    |    1   | 64×50×50
Max pooling | 5×5    |    2   | 64×23×23
Convolution | 8×8    |    1   | 128×16×16
Convolution | 9×9    |    1   | 256×8×8
Convolution | 8×8    |    1   | 256×1×1
Dense       |    -   |    -   | 4×1×1
Softmax     |    -   |    -   | 4×1×1

In [27]:
class CodingNetwork(nn.Module):
    
    #Our batch shape for input x is (3, 140, 140)
    
    def __init__(self):
        super(CodingNetwork, self).__init__()
        
        #Input channels=3, output channels=32
        self.conv1 = nn.Conv2d(3,  32, kernel_size=11, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=11, stride=1, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=5, stride=2, padding=0)
        
        self.conv3 = nn.Conv2d(32, 64, kernel_size=9, stride=1, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=5, stride=2, padding=0)

        self.conv4 = nn.Conv2d(64, 128, kernel_size=8, stride=1, padding=0)
        self.conv5 = nn.Conv2d(128, 256, kernel_size=9, stride=1, padding=0)
        self.conv6 = nn.Conv2d(256, 256, kernel_size=8, stride=1, padding=0)

        #4608 input features, 64 output features (see sizing flow below)
        self.fc = nn.Linear(256, 4)

    def forward(self, x):
        
        #Computes the activation of the first convolution
        
        x = F.relu(self.conv1(x)) # Size changes from (3, 140, 140) to (32, 130, 130)
        x = F.relu(self.conv2(x)) # Size changes from (32, 130, 130) to (32, 120, 120)
        x = self.pool1(x)         # Size changes from (32, 120, 120) to (32, 58, 58)

        x = F.relu(self.conv3(x)) # Size changes from (32, 58, 58) to (64, 50, 50)
        x = self.pool2(x)         # Size changes from (64, 50, 50) to (64, 23, 23)

        x = F.relu(self.conv4(x)) # Size changes from (64, 23, 23) to (128, 16, 16)
        x = F.relu(self.conv5(x)) # Size changes from (128, 16, 16) to (256, 8, 8)
        x = F.relu(self.conv6(x)) # Size changes from (256, 8, 8) to (256, 1, 1)

        x = x.view(-1, 256)       # Size changes from (256, 1, 1) to (256)
        x = self.fc(x)            # Size changes from (256) to (4)
        
        return(x)

torch.Size([16, 4])


In [40]:
model = CodingNetwork()

x = torch.randn(16, 3, 140, 140)
output = model(x)