In [7]:
from sklearn.svm import SVC
 
import argparse
 
import mahotas
 
import glob
 
import cv2
 
import numpy as np
import pandas as pd

from tqdm import tqdm

In [28]:
def describe(image, mask=None):
 
    #取RGB三個channels的平均及變異

    (means, stds) = cv2.meanStdDev(image)

    #合併平均及變異兩種數值再攤為一維

    colorRGB = np.concatenate([means, stds]).flatten()

    #取Haralink textures

    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    hara = mahotas.features.haralick(image).mean(axis=0)

    #將兩種特徵值合併

    features = np.append(colorRGB, hara)

    return features

def track_hara(text):
    haralick_name = ["id",
                     "B_std","G_std","R_std",
                     "B_mean","G_mean","R_mean",
              "Angular Second Moment",
              "Contrast",
              "Correlation",
              "Variance",
              "Inverse Difference Moment",
              "Sum Average",
              "Sum Variance",
              "Sum Entropy",
              "Entropy ",
              "Difference Variance",
              "Difference Entropy",
              "f12",
              "f13",
              "label"]

    table = pd.DataFrame(columns=haralick_name)
    print("total img amount:",len(text))
    for i in tqdm(range(len(text))):
        l = text[i][0].rfind("/")+1
        r = text[i][0].rfind(".")
        name = text[i][0][l:r]
        lable = int(text[i][1])
        image = cv2.imread("./"+text[i][0])
        feature  = describe(image).tolist()
        temp_list = [[name]+feature+[lable]]
        temp_table = pd.DataFrame(temp_list,columns=haralick_name)
        table = table.append(temp_table, ignore_index=True)

    return(table)

def opendata(path):
    
    f = open(path,"r")
    text = []
    for line in f:
        temp = line.split(' ')
        temp[1] = temp[1][:-1]
        text.append(temp)
    f.close

    final_feature_table = track_hara(text)
    return (final_feature_table)

In [29]:
# 取得train資料的 haralick 特徵
print("train data")
trainpath = 'train.txt'
final_feature_table = opendata(trainpath)
final_feature_table.to_csv("train_haralick.csv", index=False)

# 取得 val資料的 haralick 特徵
print("val data")
valpath = 'val.txt'
final_feature_table = opendata(valpath)
final_feature_table.to_csv("val_haralick.csv", index=False)

# 取得test資料的 haralick 特徵
print("test data")
testpath = 'test.txt'
final_feature_table = opendata(testpath)
final_feature_table.to_csv("test_haralick.csv", index=False)

  0%|                                                                                | 6/63325 [00:00<19:53, 53.06it/s]

train data
total img amount: 63325


100%|████████████████████████████████████████████████████████████████████████████| 63325/63325 [28:51<00:00, 36.57it/s]
  1%|▋                                                                                 | 4/450 [00:00<00:11, 39.80it/s]

val data
total img amount: 450


100%|████████████████████████████████████████████████████████████████████████████████| 450/450 [00:10<00:00, 41.18it/s]
  1%|▉                                                                                 | 5/450 [00:00<00:10, 41.89it/s]

test data
total img amount: 450


100%|████████████████████████████████████████████████████████████████████████████████| 450/450 [00:10<00:00, 41.64it/s]
