# Skin detection

This kernel is still growing! ;-) I will add comments, explanations and improvements as long as I proceed with this method and as I like to share what I found. Happy reading! 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.transform import resize, rescale
from skimage.io import imread, imshow
from glob import glob
from sklearn.mixture import GaussianMixture


%matplotlib inline

INPUTFOLDER = '../input/train/'

In [None]:
all_cervix_images = []

for path in glob(INPUTFOLDER + "*"):
    cervix_type = path.split("/")[-1]
    cervix_images = glob(INPUTFOLDER + cervix_type + "/*")
    all_cervix_images = all_cervix_images + cervix_images

all_cervix_images = pd.DataFrame({'imagepath': all_cervix_images})
all_cervix_images['type'] = all_cervix_images.apply(lambda row: row.imagepath.split("/")[-2], axis=1)
all_cervix_images.head()

In [None]:
nr_images = len(all_cervix_images['imagepath'].unique())
nr_images

In [None]:
all_cervix_images['type'].value_counts()

In [None]:
fig = plt.figure(figsize=(13,13))

i = 1
nr_examples = 3
for t in all_cervix_images['type'].unique():
    for m in range(nr_examples):
        ax = fig.add_subplot(3,nr_examples,i)
        plt.title('cervix {}'.format(t))
        i+=1
        f = all_cervix_images[all_cervix_images['type'] == t]['imagepath'].values[m]
        plt.imshow(imread(f))

### Challenges
- different image shapes
- medical equipment
- circular and rectangular views
- illumination quality (and missing color channels)
- different cervix sizes

## An example image

In [None]:
example = imread(all_cervix_images[all_cervix_images['type'] == 'Type_3']['imagepath'].values[0])

In [None]:
imshow(example)

In [None]:
fig = plt.figure(figsize=(13,13))
strings = ['red', 'green', 'blue']
for channel in range(3):
    ax = fig.add_subplot(1,3,channel+1)
    plt.title('{}'.format(strings[channel]))
    plt.imshow(example[:,:,channel])

In [None]:
std_over_channels_image = np.std(example, axis=2)
fig = plt.figure(figsize=(13,13))
imshow(std_over_channels_image)
plt.title('Standard deviation over color channels')
plt.show()

## Skin clustering: DBSCAN

In [None]:
class Cluster:

    def __init__(self):
        self.cluster = set()

    def add(self, point):
        if point not in self.cluster:
            self.cluster.add(point)

    def contains(self, point):
        return point in self.cluster

    
def euclidian(X, Y):
    squared = np.power(X-Y, 2)
    sum = np.sum(squared)
    return np.sqrt(sum)