In [5]:
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.optimize import curve_fit
import csv
import pandas as pd
import sys
#import statsmodels.api as sm
np.set_printoptions(threshold=sys.maxsize)
csv.field_size_limit(sys.maxsize)

base_dir = "/home/ehuarotop/Documents/Computer_Science/UFRGS/"

#PATH_DIR = "../../projeto-geodigital/FigurasAnotadas01"
#PATH_DIR = "../../jupyter-notebooks/projeto-geodigital/Imagems_de_PDFs"
#PATH_DIR = base_dir + "projeto-geodigital/platos_comida"
#PATH_DIR = base_dir + "projeto-geodigital/Dataset-BG-Petrobras/img_from_google_francisco/petrobras"
PATH_DIR = "image_dataset_5"

# TODO: verify parameter 'img' (should be 'file'?)
def get_histogram(file, abs_value=False, downsample=1):
    print(file)
    #reading the image into an mxnx3 array
    img = cv.imread(file)

    #getting pixel values in float32
    img = img.astype(np.float32)

    #splitting color channels
    b,g,r = cv.split(img)
    #merging them in RGB order.
    img_orig = cv.merge((r,g,b))
    
    #Calculating the 3D histogram of cores:
    rows,cols,channels = img_orig.shape
    img_vector = np.reshape(img_orig, (rows*cols,channels))
    img_hist3d = np.histogramdd(img_vector, bins=5)
    img_norm_hist3d = img_hist3d[0] / np.max(img_hist3d[0])
    i,j,k = img_norm_hist3d.shape
    img_norm_hist3d_feature_vector = np.array(np.reshape(img_norm_hist3d,  i*j*k))

    #defining the filter operator
    gfilter = np.matrix(('1.0 -1.0'))

    #applying filter to each matrix separately (image derivative)
    b = cv.filter2D(b, -1, gfilter)
    g = cv.filter2D(g, -1, gfilter)
    r = cv.filter2D(r, -1, gfilter)
    
    #If abs_value is specified
    if abs_value == True:
        b = np.abs(b)
        g = np.abs(g)
        r = np.abs(r)

    #merging color channels after filter application
    img = cv.merge((r,g,b))
    
    if abs_value == True:
        N = 256
    else:
        N = 512-1
        
    N = N // downsample

    histr = np.zeros(shape=(N,1), dtype=np.float32)

    for channel in range(3):
        #calculating the histogram
        if abs_value == True:
            histr += cv.calcHist([img],[channel],None,[N],[0,255])
        else:
            histr += cv.calcHist([img],[channel],None,[N],[-255,255])

    #normalized histogram (# norm_histr = histr / histr[0])
    norm_histr = histr/np.max(histr)
    
    return img_norm_hist3d_feature_vector, norm_histr

def read_image(image_path, type='normal'):
    if type == 'normal':
        img = cv.imread(image_path)
        b,g,r = cv.split(img)
        img = cv.merge([r,g,b])
    elif type == 'gray':
        img = cv.imread(image_path, 0)
        
    return img

#defining list of images
images = []

minimum = 2048

####################################### Getting normalized histograms ################################## 
#Iterating recursively over images inside the desired path to get image histograms.
for root, directories, filenames in os.walk(PATH_DIR):
    for filename in filenames:
        #collecting features for the current image.
        image = []
        file = os.path.join(root,filename)

        if file.endswith(".png") or file.endswith(".jpg"):
            #Excluding 38 images with .gif extension(listed in lista_images_tipo_gif_figuras_anotadas.txt)
            #getting root directory and filename for each image and append to features.
            image.append(root)
            image.append(filename)

            #getting original image and his histogram
            img_norm_hist3d_feature_vector, norm_histr = get_histogram(file, True)
            
            #append histr to image list
            image.append(np.array_str(img_norm_hist3d_feature_vector))
            image.append(np.array_str(norm_histr.reshape(1,-1)))
            
            #getting the FFT of the image.
            x_medio = 100
            y_medio = 100
            img = read_image(root + "/" + filename, 'gray')
            img = cv.resize(img, (x_medio, y_medio))
            fft = np.fft.fft2(img)
            
            image.append(fft.real.reshape(1,-1))
            image.append(fft.imag.reshape(1,-1))
            
            #Generating sift descriptors
            sift = cv.xfeatures2d.SIFT_create()
            kp, des = sift.detectAndCompute(img,None)
            #Getting only the first 16 SIFT descriptors
            des = des[:16]
            #des = des.reshape(des.shape[0]*des.shape[1],1)
            image.append(np.array_str(des.reshape(1,-1)))
            
            images.append(image)
                        
####################################### Getting the missing features ##################################            

#Converting images list into a dataframe in order to process each dir or subdir separately.
labels = ['directory', 'filename', 'img_norm_hist3d', 'norm_hist', 'FFTReal','FFTImag','SIFT']
pd_images = pd.DataFrame.from_records(images, columns=labels)

#adding the other labels with blank values
pd_images = pd_images.reindex(columns = labels)

print('before save to csv')

#Saving dataframe to csv
pd_images.to_csv("accio_my_image_dataset_5.csv", index=False)

print("Done")

image_dataset_5/Fotos-Pessoas/BGP V.22 N.1-380.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2910.png
image_dataset_5/Fotos-Pessoas/BGP V.22 N.1-716.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2804.png
image_dataset_5/Fotos-Pessoas/BGP V.12 N.2-44.png
image_dataset_5/Fotos-Pessoas/BGP V.22 N.1-726.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.2-222.png
image_dataset_5/Fotos-Pessoas/BGP V.22 N.1-724.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.1-786.png
image_dataset_5/Fotos-Pessoas/BGP V.22 N.1-718.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.2-405.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2609.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2810.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.1-691.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2806.png
image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2724.png
image_dataset_5/Fotos-Pessoas/BGP V.12 N.2-2065.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.1-580.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.1-865.png
image_dataset

image_dataset_5/Fotos-Pessoas/BGP V.20 N.1-2-2815.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.2-456.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.2-302.png
image_dataset_5/Fotos-Pessoas/BGP V.21 N.2-409.png
before save to csv
Done
