### This algorithm extractacts two global features and one local feature from the flowers.

#### Global Features

    1.Hu Moments that quantifies shape of the flower.
    2.Haralick Texture that quantifies texture of the flower.
    
#### Local Feature

    1.SURF(Sppeded-up Robust Features) to extract keypoints and descriptors of the flower.

In [3]:
import os
import cv2
import time
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from tqdm import tqdm
from module_functions import *

In [5]:
fixed_size  = tuple((500,500))
train_path = "dataset"
bins = 8
os.listdir(train_path)

['Crocus', 'Dandelion', 'LilyValley', 'Pancy', 'Rose']

#### BOVG(Bag of Virtual Words) method is applied to construct histogram of flower from its SURF decsriptors. 

In [6]:
for training_name in tqdm(os.listdir(train_path)):
    
    dir = os.path.join(train_path, training_name)
    surf_des_array = np.empty(shape=(0,64))
    
    for file in os.listdir(dir):

        file = os.path.join(dir, file)
        image = cv2.imread(file)
        
        if image is not None:
            image = cv2.resize(image,fixed_size)
            _, surf_des = fd_surf(image)
            
            surf_des_array = np.append(surf_des_array, surf_des, axis = 0)

100%|██████████| 5/5 [00:27<00:00,  5.56s/it]


In [10]:
ti = time.time()
surf_kmeans = KMeans(n_clusters = 200).fit(surf_des_array)
surf_cl_centers = surf_kmeans.cluster_centers_
tf = time.time()
print(f'Time taken : {int((tf - ti) // 60)}:{int((tf - ti) % 60)} m')

Time taken : 2:27 m


In [11]:
features = []
labels = []
all_images = []

for training_name in tqdm(os.listdir(train_path)):
    
    dir = os.path.join(train_path, training_name)
    current_label = training_name
        
    for file in os.listdir(dir):
        
        file = os.path.join(dir, file)
        image = cv2.imread(file)
        
        if image is not None:
            image = cv2.resize(image,fixed_size)
            fv_surf_histogram = fd_surf_histogram(image, surf_cl_centers, fixed_size)
            fv_hu_moments = fd_hu_moments(image)
            fv_haralick   = fd_haralick(image)
            
            feature = np.hstack([fv_surf_histogram, fv_haralick, fv_hu_moments])
            features.append(feature)
            labels.append(current_label)
            all_images.append(image)

100%|██████████| 5/5 [02:19<00:00, 27.95s/it]


In [12]:
labels = np.array(labels).reshape(-1,1)
scaler = StandardScaler()
rescaled_features = scaler.fit_transform(features)

In [21]:
df = pd.concat([pd.DataFrame(rescaled_features), pd.DataFrame(labels)], axis = 'columns')
df = df.dropna()
df.columns = range(len(df.columns))

In [3]:
#df.to_csv('feature_vectors.csv', index = False)
#np.save('flower_images.npy', all_images)