In [None]:
import cv2
import pandas as pd
import numpy as np
import os
from sklearn import preprocessing
import time
from collections import Counter
from skimage.feature import hog
from skimage import data, exposure
# from skimage.feature import greycomatrix, greycoprops
import math
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew,kurtosis
# from mahotas.features import surf
from skimage import measure
from skimage import feature, color
# import mahotas
import mimetypes
import matplotlib.pyplot as plt
from skimage.filters import gabor
from skimage.color import rgb2gray
from PIL import Image

In [None]:
root_path = '/Users/tony/Desktop/CoffeeBeansT'
# root_path = 'D:\\ProjectMango\\output\\augmentation-enhancement'
current_dir = os.getcwd()
os.chdir(root_path)
print(current_dir)

In [None]:
os.listdir()

In [None]:
from os import walk

label = []
path = []
names = []

for folder_name in os.listdir():
    current_dir = os.path.join(root_path,folder_name)
    for root,dirs,files in walk(current_dir):
      for file in files:
        file_mimetype = mimetypes.guess_type(file)[0]
        if file_mimetype == 'image/jpeg':
          path.append(os.path.join(root,file))
          label.append(folder_name)
          names.append(file)
        

In [None]:
len(path), len(label)

In [None]:
df = pd.DataFrame(np.array([path,names,label]).T,columns=['path','filename','label'])
df

In [None]:
df.dtypes

In [None]:
def find_static_data(img,hsv=False):
  #b,g,r
  #h,s,v
  #计算图像在每个颜色通道的均值和标准差
  mean = img.mean(axis=(0,1))
  sd = img.std(axis=(0,1))

  if hsv:
    #如果hsv为Ture，则先将图像从RGB转换为HSV颜色空间
    #h,s,v = cv2.split(img)
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(img_hsv)
    
    flatted_H = h.flatten()
    flatted_S = s.flatten()
    flatted_V = v.flatten()

    s_h = skew(flatted_H)
    s_s = skew(flatted_S)
    s_v = skew(flatted_V)

    k_h = kurtosis(flatted_H)
    k_s = kurtosis(flatted_S)
    k_v = kurtosis(flatted_V)

    payload = {
      'mean' : mean,
      'std' : sd,
      'skew' : [s_h,s_s,s_v],
      'kurtosis' : [k_h,k_s,k_v]
    }
    
  else:
    b, g, r = cv2.split(img)

    flatted_B = b.flatten()
    flatted_G = g.flatten()
    flatted_R = r.flatten()

    s_b = skew(flatted_B)
    s_g = skew(flatted_G)
    s_r = skew(flatted_R)

    k_b = kurtosis(flatted_B)
    k_g = kurtosis(flatted_G)
    k_r = kurtosis(flatted_R)

    payload = {
      'mean' : mean,
      'std' : sd,
      'skew' : [s_b,s_g,s_r],
      'kurtosis' : [k_b,k_g,k_r]
    }

  return payload
  

In [None]:
def get_pixel(img, center, x, y):
    """
    比较图像中的一个像素与中心像素的值。
    
    参数:
    - img: 输入图像
    - center: 中心像素的灰度值
    - x, y: 要比较的像素的坐标
    
    返回:
    - 如果指定像素的值大于等于中心像素的值，则返回1，否则返回0。
    - 如果指定像素位于图像边界外，则视为0。
    """
    th = 0   # 阈值，可以调整
    try:
        # 检查像素是否在图像边界内
        if img[x, y] >= center - th:
            return 1
        else:
            return 0
    except IndexError:
        # 如果像素位于图像边界之外，则返回0
        return 0


# Function for calculating LBP
def lbp_calculated_pixel(img, x, y):
    """
    计算并返回图像中指定像素位置的局部二值模式(LBP)值
    """
    center = img[x][y]
   
    val_ar = []
      
    # top_left
    val_ar.append(get_pixel(img, center, x-1, y-1))
      
    # top
    val_ar.append(get_pixel(img, center, x-1, y))
      
    # top_right
    val_ar.append(get_pixel(img, center, x-1, y + 1))
      
    # right
    val_ar.append(get_pixel(img, center, x, y + 1))
      
    # bottom_right
    val_ar.append(get_pixel(img, center, x + 1, y + 1))
      
    # bottom
    val_ar.append(get_pixel(img, center, x + 1, y))
      
    # bottom_left
    val_ar.append(get_pixel(img, center, x + 1, y-1))
      
    # left
    val_ar.append(get_pixel(img, center, x, y-1))
       
    # Now, we need to convert binary
    # values to decimal
    #将二进制值转换为十进制
    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
   
    val = 0
      
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
          
    return val

In [None]:
def get_pixel(img, center, x, y):
      
    new_value = 0
      
    try:
        # If local neighbourhood pixel 
        # value is greater than or equal
        # to center pixel values then 
        # set it to 1
        if img[x][y] >= center:
            new_value = 1
              
    except:
        # Exception is required when 
        # neighbourhood value of a center
        # pixel value is null i.e. values
        # present at boundaries.
        pass
      
    return new_value

In [None]:
def compute_haralick_features(image, distances, angles):
    # Convert the image to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Compute the GLCM matrix
    glcm = graycomatrix(gray_image, distances, angles, levels=256, symmetric=True, normed=True)

    # Calculate the Haralick texture features
    features = []
    for prop in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']:
        feature = np.mean(graycoprops(glcm, prop))
        features.append(feature)
        
    payload = {
      'Contrast' : features[0],
      'Dissimilarity' : features[1],
      'Homogeneity' : features[2],
      'Energy' : features[3],
      'Correlation' : features[4]
    }
    
    return payload


In [None]:
def extract_hog_features(image):
    # Convert the image to grayscale
    gray_image = color.rgb2gray(image)
    
    # Extract HOG features
    hog_features = feature.hog(gray_image)

    return hog_features

In [None]:
def extract_contour_features(image_path):
    # Load the image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    threshold_value = 128  # Adjust this value based on your image characteristics

    # Apply thresholding to create a binary image
    _, binary_image = cv2.threshold(image, threshold_value, 255, cv2.THRESH_BINARY)

    # Find contours in the binary image
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Initialize a list to store contour-based features
    contour_features_list = []
    
    # Loop through each contour
    for contour in contours:
        # Calculate area of the contour
        area = cv2.contourArea(contour)
        
        # Calculate perimeter of the contour
        perimeter = cv2.arcLength(contour, True)
        
        # Calculate compactness (perimeter^2 / area)
        compactness = (perimeter ** 2) / area if area != 0 else 0.0
        
        # Calculate bounding box dimensions
        x, y, w, h = cv2.boundingRect(contour)
        
        # Calculate aspect ratio of the bounding box
        aspect_ratio = float(w) / h
        
        # Calculate extent (area of contour / area of bounding box)
        extent = area / (w * h)
        
        # Create a dictionary to store the features
        features_dict = {
            "Area": area,
            "Perimeter": perimeter,
            "Compactness": compactness,
            "Aspect Ratio": aspect_ratio,
            "Extent": extent
        }
        
        # Add the dictionary to the list
        contour_features_list.append(features_dict)
    
    return contour_features_list

In [None]:
for row,path in enumerate(df['path']):
  start = time.time()
  image = cv2.imread(path)
  image_resize = cv2.resize(image,(256,256),interpolation = cv2.INTER_AREA)
  print(path)

  # # # Shape Feature Extraction Contour
  contour = extract_contour_features(path)
  df.loc[row,'contourArea'] = contour[0]['Area']
  df.loc[row,'contourPerimeter'] = contour[0]['Perimeter']
  df.loc[row,'contourCompactness'] = contour[0]['Compactness']
  df.loc[row,'contourAspectRatio'] = contour[0]['Aspect Ratio']
  df.loc[row,'contourExtent'] = contour[0]['Extent']


  # # # Texture Feature Extraction LBP
  height, width, _ = image_resize.shape
  img_gray = cv2.cvtColor(image_resize,cv2.COLOR_BGR2GRAY)
  img_lbp = np.zeros((height, width),np.uint8)
  for i in range(0, height):
    for j in range(0, width):
        img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)
  vector_lbp = img_lbp.flatten()
  counted = Counter(vector_lbp)
  for key,value in counted.items():
    df.loc[row, f"lbp_{key}"] = value

  # # # Texture Feature Extraction Haralick
  distances = [1]  # Distance between pixels in the GLCM
  angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]  # Angles for GLCM computation

  haralick_features = compute_haralick_features(image_resize, distances, angles)
  print(haralick_features)
  df.loc[row,'Contrast'] = haralick_features['Contrast']
  df.loc[row,'Dissimilarity'] = haralick_features['Dissimilarity']
  df.loc[row,'Homogeneity'] = haralick_features['Homogeneity']
  df.loc[row,'Energy'] = haralick_features['Energy']
  df.loc[row,'Correlation'] = haralick_features['Correlation']


  # # # # Color Feature Extraction RGB
  # payload_rgb = find_static_data(image_resize)
  # #Average
  # df.loc[row,'R_mean'] = payload_rgb['mean'][2]
  # df.loc[row,'G_mean'] = payload_rgb['mean'][1]
  # df.loc[row,'B_mean'] = payload_rgb['mean'][0]
  # #Standard deviation
  # df.loc[row,'R_STD'] = payload_rgb['std'][2]
  # df.loc[row,'G_STD'] = payload_rgb['std'][1]
  # df.loc[row,'B_STD'] = payload_rgb['std'][0]
  # # Skewness
  # df.loc[row,'R_skewness'] = payload_rgb['skew'][2]
  # df.loc[row,'G_skewness'] = payload_rgb['skew'][1]
  # df.loc[row,'B_skewness'] = payload_rgb['skew'][0]
  # # Kurtosis
  # df.loc[row,'R_kurtosis'] = payload_rgb['kurtosis'][2]
  # df.loc[row,'G_kurtosis'] = payload_rgb['kurtosis'][1]
  # df.loc[row,'B_kurtosis'] = payload_rgb['kurtosis'][0]



  # # # # Color Feature Extraction HSV
  # hsv = cv2.cvtColor(image_resize,cv2.COLOR_RGB2HSV)
  # payload_hsv = find_static_data(hsv,hsv = True)

  # #Average
  # df.loc[row,'H_mean'] = payload_hsv['mean'][0]
  # df.loc[row,'S_mean'] = payload_hsv['mean'][1]
  # df.loc[row,'V_mean'] = payload_hsv['mean'][2]
  # #Standard deviation
  # df.loc[row,'H_STD'] = payload_hsv['std'][0]
  # df.loc[row,'S_STD'] = payload_hsv['std'][1]
  # df.loc[row,'V_STD'] = payload_hsv['std'][2]
  # # Skewness
  # df.loc[row,'H_skewness'] = payload_hsv['skew'][0]
  # df.loc[row,'S_skewness'] = payload_hsv['skew'][1]
  # df.loc[row,'V_skewness'] = payload_hsv['skew'][2]
  # # Kurtosis
  # df.loc[row,'H_kurtosis'] = payload_hsv['kurtosis'][0]
  # df.loc[row,'S_kurtosis'] = payload_hsv['kurtosis'][1]
  # df.loc[row,'V_kurtosis'] = payload_hsv['kurtosis'][2]
    
  print('finished task at '+ str(time.time() - start))



In [None]:
df

In [None]:
os.listdir()
print(os.getcwd())

In [None]:
df.to_csv('feature_extractions_data.csv',index=False)