In [19]:
from skimage import feature
from imutils import paths
import numpy as np
import mahotas
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

# Read Data

In [11]:
trainDF = pd.read_csv("train.txt", sep=" ", header = None)
trainDF.columns = ["file_path", "img_label"]

valDF = pd.read_csv("val.txt", sep=" ", header = None)
valDF.columns = ["file_path", "img_label"]

testDF = pd.read_csv("test.txt", sep=" ", header = None)
testDF.columns = ["file_path", "img_label"]

In [20]:
def describe(image, ftype="Hara"):
    #上面指令是從圖片的HSV色彩模型中，取得其平均值及標準差（有RGB三個channels，因此會各有3組平均值及標準差）作為特徵值
    (means, stds) = cv2.meanStdDev(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))
    #進行降維處理：將means及stds各3組array使用concatenate指令合成1組，再予以扁平化（變成一維）。
    colorStats = np.concatenate([means, stds]).flatten()
    #將圖片轉為灰階
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if(ftype=="Hara"):
        #取Haralick紋理特徵(texture features)的平均值
        haralick = mahotas.features.haralick(gray).mean(axis=0)

        #使用np.hstack將兩個一維的特徵陣列colorStats及haralick合成一個
        return np.hstack([colorStats, haralick])

    else:
        #P=30
        numPoints = 2
        #r=3
        radius = 0.25
        #eps指The "close-enough" factor，為一極小值，用以判斷兩數是否相當接近，在此是避免相除時分母為零發生錯誤
        eps = 1e-7
        lbp = feature.local_binary_pattern(gray, numPoints, radius, method="uniform")
        #Numpy的ravel()類似flattern
        (hist, _) = np.histogram(lbp.ravel(), bins=range(0, numPoints + 3), range=(0, numPoints + 2))

        # normalize the histogram
        hist = hist.astype("float")
        hist /= (hist.sum() + eps)

        return np.hstack([colorStats, hist])

In [25]:
DFname = ["trainDF", "valDF", "testDF"]

for df in DFname:
    eval('exec("data_"+df+"=[]")')
    for idx in tqdm(range(len(eval(df)))):
        image = cv2.imread(eval(df).iloc[idx, 0])
        features = describe(image, ftype = "LBPs")
        eval("data_"+df).append(features)

100%|████████████████████████████████████████████████████████████████████████████| 63325/63325 [11:46<00:00, 89.69it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 450/450 [00:05<00:00, 85.00it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 450/450 [00:05<00:00, 85.29it/s]


In [31]:
featureDF_train = pd.DataFrame(data_trainDF)
featureDF_train.columns = ['h_mean', 's_mean', 'v_mean', 'h_std', 's_std', 'v_std', 'LBPs1', 'LBPs2', 'LBPs3', 'LBPs4']
featureDF_train['labels'] = trainDF["img_label"]
featureDF_train.drop(columns = ["LBPs4"], inplace = True)

featureDF_test = pd.DataFrame(data_testDF)
featureDF_test.columns = ['h_mean', 's_mean', 'v_mean', 'h_std', 's_std', 'v_std', 'LBPs1', 'LBPs2', 'LBPs3', 'LBPs4']
featureDF_test['labels'] = testDF["img_label"]
featureDF_test.drop(columns = ["LBPs4"], inplace = True)


featureDF_val = pd.DataFrame(data_valDF)
featureDF_val.columns = ['h_mean', 's_mean', 'v_mean', 'h_std', 's_std', 'v_std', 'LBPs1', 'LBPs2', 'LBPs3', 'LBPs4']
featureDF_val['labels'] = valDF["img_label"]
featureDF_val.drop(columns = ["LBPs4"], inplace = True)


In [33]:
featureDF_train.to_csv("HSV_LBPs_train.csv", index=None)
featureDF_test.to_csv("HSV_LBPs_test.csv", index=None)
featureDF_val.to_csv("HSV_LBPs_val.csv", index=None)