# Packages

In [1]:
# set up Python environment: numpy for numerical routines
import numpy as np
import pandas as pd

# for store the results
from six.moves import cPickle as pickle
import gzip

# our code (utilsData needs a view)
import sys
sys.path.append('../pycode/')
import utilsData
from preprocess import utilities

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
# own models and functions
from preprocess.mdl import MDL_method
from preprocess.unsupervised import Unsupervised_method
from models.nb import Naive_Bayes
from models.aode_fast import AODE_fast

# default models from scikit
from sklearn.naive_bayes import GaussianNB

import cv2

In [3]:
import os, os.path

mainPath='/home/frubio/AVA/'
featuresPath = "/home/frubio/aesthetic_quality/features/dSIFT/initialRad{:d}_scales{:d}_factor{:.1f}/AVA/"

import fisher_vector
from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC

# Load and Train NB with Histogram of Gray

In [4]:
features_file = '../features/AVA/GHIST.arff'
#features_file = '../features/features_pool5_ResNet.pklz'
output_file = '../prueba.pklz'
selected_model = 'NBG'
decaf_discrete = 'False'

In [5]:
if features_file[-4:] == 'pklz':
    features = pickle.load(open(features_file,'rb',2))
else:
    features = utilsData.readARFF(features_file)
    
features['id'] = features['id'].astype(int)
#for test in notebooks
#features = features.iloc[:,-101:]

# we take the name of the features and delete de ID
features_names = np.array(features.columns)
index = np.argwhere(features_names=='id')
features_names = np.delete(features_names, index)

# this line is for normalize decaf features
if (decaf_discrete == 'True'):
    features[features_names],_ = utilities.reference_forward_implementation(np.array(features[features_names]),5,2,1.5,0.75)

data = pickle.load(gzip.open('../packages/AVA_info.pklz','rb',2))
data = data.merge(features, on='id', copy=False)

num_images = data.shape[0]

data_aux = data[np.append(features_names,['Class'])]
data_aux['Class'] = pd.Categorical(data_aux['Class'],range(0,len(data_aux['Class'].unique())))

# to free space
del features
del data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
data_fold = data_aux.copy()

model = GaussianNB()
model.fit(data_fold.loc[:,features_names],data_fold['Class'].cat.codes)

GaussianNB(priors=None)

# Testing speed of histogram extraction and NB classification

In [4]:
import warnings
warnings.filterwarnings('ignore')

## all process

In [28]:
%%timeit -n 10
img = cv2.imread('../fondo1.jpg',0)
hist = np.histogram(img, bins = 256, range=(0,256), density = True)[0]
#hist = pd.DataFrame(data=[hist],columns=data_aux.columns[0:-1])

#hist = discretization.process(hist)
results = model.predict_proba(hist)

10 loops, best of 3: 5.77 ms per loop


# Now is the turn for the SIFT and FV

In [9]:
def extractSizeFromRadius(radius):
    SIFT_DESCR_WIDTH = 4
    SIFT_DESCR_SCL_FCTR = 3.0

    final_size = radius / (SIFT_DESCR_SCL_FCTR * (SIFT_DESCR_WIDTH + 1) * 0.5 * 0.5 * 1.4142135623730951)
    return final_size

In [10]:
class FV_dictionary:
    
    def __init__(self,size_PCA, size_patch, size_gmm):
        self.size_patch = size_patch
        self.pca = PCA(n_components=size_PCA)
        self.gmm = GaussianMixture(n_components=size_gmm, covariance_type='diag')
        self.size_descriptor = 0
        
    def generate_dict(self,indexes,files,path):
        
        matrix_features = self.extract_patch_features(indexes, files, path)
        
        descriptor_size = matrix_features.shape[1]
        if descriptor_size > self.pca.n_components:
            self.pca.fit(matrix_features)
            matrix_features = self.pca.transform(matrix_features)
            self.size_descriptor = self.pca.n_components
        else:
            self.size_descriptor = descriptor_size
            
        self.gmm.fit(matrix_features)
        
    def obtain_fv(self,indexes,files,path):
        
        fv_size = self.gmm.n_components*(1+2*self.size_descriptor)
        final_matrix = np.zeros((indexes.shape[0],fv_size))
        counter = 0
        for i in indexes:
            fname=path+files[i]+'.pklz'
            if os.path.isfile(fname):
                sift = pickle.load(gzip.open(fname,"rb",2))
                descriptor_size = sift.shape[1]
                if descriptor_size > self.pca.n_components:
                    sift = self.pca.transform(sift)
                final_matrix[counter] = fisher_vector.fisher_vector(sift, self.gmm)
                counter += 1
        return final_matrix
    
    def extract_patch_features(self, indexes, files, path):
        # We extract the number of vectors corresponding to the size of the patch / number of images
        nImages = indexes.shape[0]
        featuresPerImage = int(self.size_patch / nImages)
        finalMatrix = np.zeros((featuresPerImage*nImages, 128),dtype=np.float32)

        counter = 0
        
        for i in indexes:
            fname=path+files[i]+'.pklz'
            if os.path.isfile(fname):
                sift = pickle.load(gzip.open(fname,"rb",2))
                selectedFeat = np.random.choice(range(0,sift.shape[0]),replace=False,size=featuresPerImage)
                finalMatrix[counter:counter+featuresPerImage] = sift[selectedFeat]
            counter += featuresPerImage
        return finalMatrix

In [11]:
# Parameters of the classification
delta = 0

# Parameters of the descriptors
scales = 5
initial_radius = 16
factor_step = 1.2

# Parameters for the FV
size_patch = 1000
size_PCA = 64
size_gmm = 256

In [12]:
data = pickle.load(gzip.open('../packages/AVA_info.pklz','rb',2))
num_images = len(data)
data.loc[:,'id'] = data['id'].apply(str)
classes = np.array(data.sort_values(['id']).loc[:,'Class'])
means = np.array(data.sort_values(['id']).loc[:,'VotesMean'])

In [13]:
dictionary = FV_dictionary(size_PCA,size_patch,size_gmm)
dictionary.generate_dict(np.array(range(0,100)),np.array(data.sort_values(['id'])['id']),featuresPath.format(initial_radius,scales,factor_step))
train_features = dictionary.obtain_fv(np.array(range(0,100)),np.array(data.sort_values(['id'])['id']),featuresPath.format(initial_radius,scales,factor_step))

In [14]:
sgd_clf = SGDClassifier(loss="hinge", penalty="l2")
sgd_clf.fit(train_features, classes[0:100])

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
       penalty='l2', power_t=0.5, random_state=None, shuffle=True,
       verbose=0, warm_start=False)

In [15]:
sift = cv2.xfeatures2d.SIFT_create()

In [29]:
%%timeit -n 10
img = cv2.imread('../fondo1.jpg',0)
radius = initial_radius
for i in range(0,scales):
    step_size = extractSizeFromRadius(radius)
    kp = [cv2.KeyPoint(x, y, step_size) for y in range(radius, img.shape[0], radius*2) 
                                    for x in range(radius, img.shape[1], radius*2)]
    dense_feat = sift.compute(img, kp)

    if i==0:
        final_feat = dense_feat[1]
    else:
        final_feat = np.concatenate((final_feat,dense_feat[1]), axis=0)

    radius = int(np.around(radius*factor_step))
    
final_feat = dictionary.pca.transform(final_feat)
final_vector = fisher_vector.fisher_vector(final_feat, dictionary.gmm)
predictions = sgd_clf.predict(final_vector)

10 loops, best of 3: 129 ms per loop
