<a href="https://colab.research.google.com/github/chisombrown/hair_classification/blob/main/hair_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from google.colab import drive
drive.mount('/content/gdrive')
from keras.datasets import mnist
from sklearn import ensemble
from sklearn.feature_selection import SelectFromModel
from os import listdir
from os.path import isfile, join
from skimage.io import imread
from skimage.color import rgb2grey
from skimage.transform import resize
from sklearn.utils import shuffle
from skimage.feature import hog

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
def paths_to_images(image_paths):
  """
  This function converts paths to images and returns a stack of images
  """
  #initialise stack of images with each image just being 227x227 zeros
  images = np.zeros((len(image_paths),227,227))
  #looping through list of paths and converting to list of images
  for i, path in enumerate(image_paths):
        image = imread(path)
        #if image in colour tranform into black and white
        if len(image.shape) == 3:
            image = rgb2grey(image)
        images[i,:,:] = image
  return images

In [None]:
#access main dir
patch_dir = '/content/gdrive/MyDrive/Patch1k'
# hair_dir = '/content/gdrive/MyDrive/Patch1k/Hair'
# nonhair_dir = '/content/gdrive/MyDrive/Patch1k/NonHair'

#access path for hair and nonhair
hair_non_hair = [join(patch_dir, 'Hair' ),join(patch_dir, 'NonHair')]
# hair_non_hair = [join(patch_dir, hair_dir ),join(patch_dir, nonhair_dir)]
#initialise list of paths
train_images_paths = []
test_images_paths = []
train_labels = []
test_labels = []
for i, dir in enumerate(hair_non_hair):
  #get paths to test and train path within each hair and nonhair dirs
  test_dir = join(dir, 'Testing')
  train_dir = join(dir, 'Training')
  # test_dir = '/content/gdrive/MyDrive/Patch1k/Hair/Testing'
  # train_dir = '/content/gdrive/MyDrive/Patch1k/Hair/Training'
  #loop style taken from stackoverflow https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
  train_images_paths += [join(train_dir, f) for f in listdir(train_dir) if isfile(join(train_dir, f))]
  test_images_paths += [join(test_dir, f) for f in listdir(test_dir) if isfile(join(test_dir, f))]
  #assign label to paths
  train_labels += [i]*len(listdir(train_dir))
  test_labels += [i]*len(listdir(test_dir))
  # print(train_images_paths[0])


# stack of train images
train_images = paths_to_images(train_images_paths)
#stack of test images
test_images = paths_to_images(test_images_paths)

# (train_X, train_y), (test_X, test_y) = mnist.load_data()



In [None]:
#save image patches and labels for easier access
np.save('train_images_patch',train_images)
np.save('test_images_patch',test_images)
np.save('train_labels_patch', train_labels)
np.save('test_labels_patch', test_labels)



In [None]:
# np.save(join('train_images_patch','MyDrive' ),train_images)


In [None]:
#load saved image patches and labels and shuffle so not memorised

train_images = np.load('train_images_patch.npy')
train_labels = np.load('train_labels_patch.npy')
train_images, train_labels = shuffle(train_images, train_labels)

test_images = np.load('test_images_patch.npy')
test_labels = np.load('test_labels_patch.npy')
test_images, test_labels = shuffle(test_images, test_labels)

In [None]:
def get_hog_feats(images):
  """
  This function creates hog descriptors of the patches
  """
  z=2
  y=4
  descriptors_stack = []
  for i, image in enumerate(images):
    hog_descriptor = hog(image,pixels_per_cell=(y,y), cells_per_block=(z,z), feature_vector=True )
    # hog_descriptor.reshape((1,len(hog_descriptor)))
    
    #this makes it 2d i believe
    hog_descriptor.reshape((-1,len(hog_descriptor)))
    descriptors_stack.append(hog_descriptor)
  # descriptors_stack = np.concatenate(descriptors_stack)
  return descriptors_stack



In [None]:
def get_tiny_feats(images):
  """
  This function downsamples images and stacks them
  """
  #initialise stack of images with each image just being 16x16 zeros
  tiny_images = np.zeros((len(images),16,16))
  #looping through list of paths and converting to list of images
  for i, image in enumerate(images):        
        resized_image = resize(image, (16,16), anti_aliasing=True)
        tiny_images[i,:,:] = resized_image
  return tiny_images


In [None]:
# train_labels = np.reshape(train_labels, (-1,1))
# test_labels = np.reshape(test_labels, (-1,1))

In [None]:
classifier = ensemble.RandomForestClassifier(n_estimators = 100)

# train_feats = get_tiny_feats(train_images)
# test_feats = get_tiny_feats(test_images)

# train_feats = np.reshape(train_feats, (train_feats.shape[0],256))
train_feats = np.zeros((1680, 108900))
train_feats = get_hog_feats(train_images)
test_feats = get_hog_feats(test_images)

classifier.fit(train_feats, train_labels)

test_feats = np.reshape(test_feats, (test_feats.shape[0],256))
classifier.score(test_feats, test_labels)