In [2]:
# import the necessary packages

import numpy as np 
import helper_functions

class features_class:
    def __init__(self,X,y):
        self.X = X
        self.y = y
                
class image_preprocess:
    #Constructor
    """
    Inputs:
        
        method: "pass " -> no transformation is applied
                "contours" -> mask input image with contours.
                "cmask" -> generate mask then apply to features matrix
                
        thickness : Only used when method is "contours" or "cmask"
    
    Functions:
        process (image_path, path_to_save)
            inputs: image_path -> path for image to process. NOTE: it needs to have "work" in the path
            output: image_out -> output image. This will also be saved to disk if it does not exist
    """
    def __init__(self, method="pass", params=[]):
        self.method   = method
        self.params   = params
        self.post_process_mask = None
        if len(params)>0:
            self.thickness = params['thick'] # first param is thickness
               
    def process (self, image_path):      
        binary_threshold = 40
        import cv2
        import numpy as np
        import os

        if self.method == "pass":
            # [ Read image from path and return it unchanged]
            image=  cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            return image
        elif self.method == "contours":
            # check if file is already generated
            if not "work" in image_path:
                print "ERROR: make sure the images have 'work' in the path directory"
                return None
            out_filename = image_path.replace("work","work_contours_T-" + str(self.thickness))
            if os.path.isfile(out_filename):
                # If file exists, simply load it
                my_out_img=  cv2.imread(out_filename, cv2.IMREAD_GRAYSCALE)
                return my_out_img
            else:
                # File does not exist, generate it
                orig_img   = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                orig_img_clone = orig_img.clone();
                elem_open  = cv2.getStructuringElement(cv2.MORPH_RECT,(5,5)) # create a 5x5 processing filter
                                                                             # for opening 
                elem_close = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
                img_open   = cv2.morphologyEx(orig_img,cv2.MORPH_OPEN ,elem_open ) # Image with open morphological op

                img_filt        = cv2.morphologyEx(img_open,cv2.MORPH_CLOSE,elem_close) # Closing morphological op
                # Threshold to binary
                ret, im_bin     = cv2.threshold(img_filt,binary_threshold,1,cv2.THRESH_BINARY)
                # Create contours using the binary image
                im2, my_contours, hierarchy = cv2.findContours(im_bin.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
                # Write a simple contours mask
                #print 'self.thickness = ', self.thickness
                mask_ext = cv2.drawContours(im_bin.copy(),my_contours,-1,255,self.thickness)
                mask = cv2.drawContours(mask_ext.copy(),my_contours,-1,100,2)

                # Mask original image:  - Value of 0 where there is no contour
                #                       - Pixel value from orig_img if it's on the masked contour
                my_masked_orig = np.where((mask_ext==255),orig_img,0)

                # Check if the out path is differnt from source ... Otherwise something has gone wrong
                if out_filename != image_path:
                    print 'Creating: ', out_filename
                    base_dir = os.path.split(out_filename)[0]
                    if not os.path.isdir(base_dir):
                        os.makedirs(base_dir) # create dir if it doesn't exist
                    cv2.imwrite(out_filename, my_masked_orig)
                # [ Return the image with masked contours]
                self.post_process_mask = mask
                return orig_img_clone
            
        elif self.method == "cmask":
            # -----------
            #  This method reads an image, generates a contours mask and
            #      - returns the original image (unchanged)
            #      - stores the contours mask as a local variable
            # -----------
            # check if file is already generated
            if not "work" in image_path:
                print "ERROR: make sure the images have 'work' in the path directory"
                return None
            out_filename = image_path.replace("work","work_cmask_T-" + str(self.thickness))
            if os.path.isfile(out_filename):
                # If file exists, simply load it
                self.post_process_mask =  cv2.imread(out_filename, cv2.IMREAD_GRAYSCALE)
                orig_img   = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)                
                return orig_img
            else:
                # File does not exist, generate it
                orig_img   = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
                elem_open  = cv2.getStructuringElement(cv2.MORPH_RECT,(5,5)) # create a 5x5 processing filter
                                                                             # for opening 
                elem_close = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3))
                img_open   = cv2.morphologyEx(orig_img,cv2.MORPH_OPEN ,elem_open ) # Image with open morphological op

                img_filt        = cv2.morphologyEx(img_open,cv2.MORPH_CLOSE,elem_close) # Closing morphological op
                # Threshold to binary
                ret, im_bin     = cv2.threshold(img_filt,binary_threshold,1,cv2.THRESH_BINARY)
                # Create contours using the binary image
                im2, my_contours, hierarchy = cv2.findContours(im_bin.copy(),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
                # Write a simple contours mask
                #print 'self.thickness = ', self.thickness
                mask_ext = cv2.drawContours(im_bin.copy(),my_contours,-1,255,self.thickness)
                mask = cv2.drawContours(mask_ext.copy(),my_contours,-1,100,2)

                # Check if the out path is differnt from source ... Otherwise something has gone wrong
                if out_filename != image_path:
                    print 'Creating: ', out_filename
                    base_dir = os.path.split(out_filename)[0]
                    if not os.path.isdir(base_dir):
                        os.makedirs(base_dir) # create dir if it doesn't exist
                    cv2.imwrite(out_filename, mask)
                # [ Return the image with masked contours]
                return ;   

class plant_feature_extraction:
    def __init__(self,method,params):
        
        # Given method can also a features filter argument. We don't want to put the filter
        # arguments in our self.params vector as we want the same name file for the extracted
        # features. This way, we'll have the same features files for all fitler methods and
        # save quite a bit of computation time.
        self.method_and_filter = method
        self.method = method.split('-filtered')[0] # first split is actual extractor method
                
        if '-filtered' in self.method_and_filter:
            self.features_filter = 'filtered-' + self.method_and_filter.split('-filtered')[1]# second split is fitlter arguments
        else:
            self.features_filter = 'none'
        self.params = params
        # ----------------------------
        # Internal variables which can change after class instantiation
        self.file_base_name = None  # lbp_rui_r1_n8_r2_n16_r3_n24
        self.test_file_name = None  # this will be updated when calling initialise()
                                    # according to the latest operators
        self.train_file_name= None
        
        self.n_bins         = None
        self.e_params       = None
        self.is_initialised = None
        
        self.work_dir       = None
        
    def initialise(self, preprocessor ,work_dir):   
        # Define our dataset training/testing names
        self.file_base_name      = 'lbp-' + self.method
        # Calculate internal variables according to given methods
        pp_base_name='PP-' + preprocessor.method
        
        for my_operator in  self.params:
            self.file_base_name += '-r%d-n%d' %( my_operator[0],my_operator[1])        

        for key in preprocessor.params:
            pp_base_name += '-' + key + '-' + str(preprocessor.params[key]) # String containing all our params and values
                                                                            # so that we don't need to regenerate
        self.test_file_name  = 'test_' +'FE-'+ self.file_base_name + '_' + pp_base_name + '.pickle'
        self.train_file_name = 'train_' +'FE-'+ self.file_base_name + '_' + pp_base_name + '.pickle'
        self.file_base_name +=  pp_base_name         
        self.work_dir        = work_dir
        self.out_dir         = work_dir.replace('work','out')        
        self.is_initialised = 1
        
    def get_features_histogram (self, image, mask):
        import numpy as np
        from skimage import feature  
        
        bins_combined = []
        method = self.method.replace('-filtered','') # we don't need the extra -filtered indication here
        if method == 'rui':
            for r_n_param in self.params:
                r = r_n_param[0]
                n = r_n_param[1]
                my_lbp_matrix  = feature.local_binary_pattern(image, n, r, method="uniform")
                masked_lbp_matrix  = np.where((mask>0),my_lbp_matrix,0)
                my_bins, my_edges = np.histogram(masked_lbp_matrix.ravel(),bins=np.arange(0, n+3), range=(0,n+2))
                bins_combined = np.concatenate( (bins_combined,my_bins) ,axis=0)
                #print 'DEBUG: %s , n= %d, r = %d ; n_bins = %d' %(str(r_n_param),n,r,len(bins_combined) )                
        self.n_bins = len(bins_combined)
        return bins_combined
    
    def extract_features(self, preprocess_class, work_dir = "", stages=['train','test'],overwrite =0):
        import os
        import cv2
        import helper_functions as hlp
        import pickle
        
        if self.is_initialised is None:
            self.initialise(preprocess_class, work_dir)
            
        for my_stage in stages:

            if my_stage == 'train':                
                feature_extraction_file_name = self.train_file_name
                input_directory = work_dir + '/train'
            elif my_stage == 'test':
                feature_extraction_file_name = self.test_file_name
                input_directory = work_dir + '/test'
            else:
                print 'ERROR !!! uknown stage %s ' % my_stage
                return
            
            my_exising_labels=[x[1] for x in os.walk(input_directory) if len(x[1]) ]
            my_exising_labels = my_exising_labels[0]
        
            if not os.path.isdir(self.out_dir):
                os.mkdir(self.out_dir)
            out_file_name = self.out_dir + '/' + feature_extraction_file_name
            
            # Check if file already exists
            if not (os.path.isfile(out_file_name) and (overwrite ==0)):
                with open(out_file_name, 'wb') as f:
                    my_bins_combined = list()
                    my_labels       =  list()
                    if not os.path.isdir(input_directory):
                        print " ERROR !!! Given input directory '%s' does not exist !!!! " % input_directory
                        return
                    print ' ---------------------------------------------------------'
                    print " Creating new feature file: '%s' for %s stage " % (out_file_name, my_stage)
                    for my_label in my_exising_labels:
                        print ' LOOKING FOR %s class ' %(my_label)
                        my_file_list = hlp.list_files(input_directory + '/' + my_label,file_ext='bmp')
                        print '   ... found %d files' %(len(my_file_list))

                        for my_file in my_file_list:
                            img_path = input_directory + '/' + my_label + '/'+ my_file
                            image = preprocess_class.process(img_path)
                            bins_combined = self.get_features_histogram(image, preprocess_class.post_process_mask)
                            my_bins_combined.append(bins_combined)
                            my_labels.append(my_label)

                    my_features = features_class(X=my_bins_combined, y=my_labels)
                    pickle.dump(my_features,f, pickle.HIGHEST_PROTOCOL)
    
    def filter_features(self,my_features):
        # Decide on some sort of features filter
        if self.features_filter !='None':
            X = np.delete(my_features.X,[8,26,52],1)
            #for i in range(len(my_features.X)): # apply some scaling?
            #    temp = my_features.X[i]
            #    X[i] *= float(temp[8])/(228*228);     
            my_features.X = X
        return my_features
    def read_features(self,filename):
        import pickle
        my_features = None
        with open (filename,'rb') as f:
            my_features = pickle.load(f)
                                 
        return self.filter_features(my_features) 
                                 
    def read_train_features(self):
        train_full_file_name = self.out_dir + '/' + self.train_file_name
        return self.read_features(train_full_file_name)
                                 
    def read_test_features(self):
        test_full_file_name = self.out_dir + '/' + self.test_file_name
        return self.read_features(test_full_file_name)
                    
class plant_classifier:
    def __init__(self,method,params):
        from sklearn.svm import SVC
        
        self.method = method
        self.params = params
        
        self.out_dir        = None
        self.is_initialised = None
        
        if self.method == 'linear_SVM':
                self.my_classifier = SVC( kernel = self.params['kernel'],
                                          C      = self.params['C'],
                                          gamma  = self.params['gamma']
                                        )
        
        if self.method == 'poly_SVM':
                self.my_classifier = SVC( kernel= self.params['kernel'],
                                          C     = self.params['C'],
                                          gamma = self.params['gamma'],
                                          degree= self.params['degree']
                                        )
            
        if self.method == 'RBF_SVM':
                self.my_classifier = SVC(kernel = self.params['kernel'],
                                         C      = self.params['C'] ,
                                         gamma  = self.params['gamma'])
                
    def initialise(self, feature_extractor):   
        # Define our dataset training/testing names
        model_base_name ='CLS'
        for key in self.params:
            model_base_name += '-' + key + '-' + str(self.params[key]) # String containing all our params and values
                                                                 # so that we don't need to regenerate
        self.file_base_name = model_base_name
        # Append feature extractor name ... generate name with filter if any
        feature_extractor_name = feature_extractor.file_base_name.replace(feature_extractor.method,
                                                                          feature_extractor.method_and_filter)
        self.file_base_name += '_FE-' + feature_extractor_name
        self.out_dir         =  feature_extractor.work_dir.replace('work','out')

        self.model_file_name = 'model_' + self.file_base_name + '.pickle'
        self.model_full_file_name =  self.out_dir + '/'+ self.model_file_name
        self.global_results_file = self.out_dir + '/' + 'global_results.txt'
        
        self.is_initialised = 1
        
        
    def train(self, feature_extractor, overwrite=0 ):
        import os
        import numpy as np
        #from sklearn.externals import joblib
        import pickle
        #from sklearn.svm import SVC
        
        if feature_extractor.is_initialised is None:
            print ' Please initialise the feature extractor first before calling the classifier'
            return
        
        self.initialise(feature_extractor)
        if not os.path.isdir(self.out_dir):
            os.mkdir(self.out_dir)
            print " Created directory '%s' " % self.out_dir       
        #print 'DEBUG: my model name = %s' %(self.model_file_name)
        model_full_file_name = self.model_full_file_name
    
        
        # Check if the model file name already exists
        if (os.path.isfile(model_full_file_name) and (not overwrite) )  :
            #print " Model file '%s' already exists. NOT OVERWRITING ! " % (model_full_file_name) 
            return
        else:
            print" Creating a prediction model file: '%s' " % model_full_file_name

            my_features = feature_extractor.read_train_features()
            model = self.my_classifier.fit(my_features.X, my_features.y)
            filename = model_full_file_name
            print 'Writting file ...'
            with open(model_full_file_name, 'wb') as f:
                pickle.dump(model,f, pickle.HIGHEST_PROTOCOL)           
    
    def predict(self, feature_extractor ):
        import os
        import numpy as np
        from sklearn.externals import joblib
        import pickle 
        from sklearn.metrics import accuracy_score
        from sklearn.metrics import confusion_matrix
        from sklearn.metrics import classification_report
        from natsort import natsorted
        
        
        if feature_extractor.is_initialised is None:
            print ' Please initialise the feature extractor first before calling the classifier'
            return
        
        self.initialise(feature_extractor)
        model_full_file_name = self.model_full_file_name
        
        # Check if the model file name already exists
        if (not os.path.isfile(model_full_file_name) )  :
            print "Model'%s' does not exist!. Please train model first! " % (model_full_file_name)
            return
        
        my_features = feature_extractor.read_test_features()
            
        test_full_file_name = self.out_dir + '/' + feature_extractor.test_file_name
                
            
        with open (test_full_file_name,'rb') as f:
            my_features = pickle.load(f)
            #TODO: put this in some features_filter ----->
            X = np.delete(my_features.X,[8,26,52],1)
            #for i in range(len(my_features.X)):
            #   temp = my_features.X[i]
            #   X[i] *= float(temp[8])/(228*228);
            # <----                    
        # Load prediction model
        with open (model_full_file_name,'rb') as f:
            my_model =pickle.load(f)            
            #result = loaded_model.score(X_test, Y_test)
            predictions = my_model.predict(X)
        
        current_score = accuracy_score(my_features.y, predictions)
        current_model_name = self.model_file_name
        print '======================='
        print('Accuracy Score = %f' %(current_score) )
        print(confusion_matrix(my_features.y, predictions))
        print(classification_report(my_features.y, predictions))
        
        # ---------------------------
        # Generate Global results file -> keep track of previous results
        # structure: "precision result" "model which was used for results"
        current_result_line = '%2.4f %s\n' %(current_score, current_model_name)
        
        if os.path.isfile(self.global_results_file):
            # Previous file exists
            with open(self.global_results_file,'r') as f:
                result_lines = f.readlines(); # contents of previous results
                found_previous_results_for_this_model = False


                for i in range(len(result_lines)):
                    previous_result_line = result_lines[i].strip() # strip the end of line
                    previous_model_name = previous_result_line.split(' ')[1]

                    if current_model_name == previous_model_name:
                        result_lines[i] = current_result_line
                        found_previous_results_for_this_model =True
                        break
                if not found_previous_results_for_this_model:
                    result_lines.append(current_result_line)
                # Resort to display in nicer format
                sorted_results = natsorted(result_lines, reverse=True)
            with open(self.global_results_file,'w') as f:
                # Rewrite file with new results
                f.writelines(sorted_results)                
        else:
            with open(self.global_results_file,'w') as f:
                f.writelines(current_result_line)
        return current_score
                              
class plant_detection:

    # Constructor
    def __init__(self, work_dir ='/scratch/data/'):       
        self.default_work_dir    = work_dir      
        
        # Define our Feature extraction methods
        self.e_method    = [ "rui",
                             "rui-filtered"]
        self.e_params    = [ [(1,8),(2,16),(3,24)],
                             [(1,8),(2,16),(3,24)]
                           ]
    
        # Define our Classification methods
        self.c_method = [ "linear_SVM",  
                          "poly_SVM"  ,
                          "RBF_SVM" ,   
                        ]
        self.c_params = [# Params for linear_SVM
                          dict([('kernel','linear' ),
                               ('C'     , 10      ),
                               ('gamma' ,0.000001 )]),
                          # Params for poly_SVM
                          dict([('kernel','poly'),
                               ('C'     , 10      ),
                               ('gamma' ,0.000001 ),
                               ('degree',2        )]),
                          # Params for RBF_SVM
                          dict([('kernel','rbf'    ),
                               ('C'     , 40      ),
                               ('gamma' ,0.00001 )]),
                         ]
        # Define our Image pre-processing methods
        self.p_method    = [ "pass",
                             "contours",
                             "cmask"]
        self.p_params    = [ dict(),
                             dict([('thick',6)]),
                             dict([('thick',6)])
                           ]
        # create dictionaries (for easier indexing)
        self.e_dict = dict( [ ( self.e_method[i],i ) for i in range(len(self.e_method)) ] )
        self.c_dict = dict( [ ( self.c_method[i],i ) for i in range(len(self.c_method)) ] )     
        self.p_dict = dict( [ ( self.p_method[i],i ) for i in range(len(self.p_method)) ] )
        
        self.initialized = False
    
    def initialize(self):
        # Create Feature Extractors
        self.e_list   = list()
        for i in range (len(self.e_method)):
            self.e = plant_feature_extraction(self.e_method[i],self.e_params[i])
            self.e_list.extend ([self.e] )
            
        # Create Classifiers
        self.c_list   = list()
        for i in range (len(self.c_method)):
            self.c = plant_classifier(self.c_method[i],self.c_params[i])
            self.c_list.extend ([self.c] )
            
        # Create Preprocessing classes
        self.p_list      = list()
        for i in range (len(self.p_method)):
            self.p = image_preprocess(self.p_method[i],self.p_params[i])
            self.p_list.extend ([self.p] )
        
        self.initialized = True
                                    
    def extract_features(self, e_method='rui', p_method='pass', work_dir='default'):
        if work_dir == 'default':
            work_dir = self.default_work_dir
        if not self.initialized:
            self.initialize()
        # Define our dataset training/testing names
        self.e = self.e_list[self.e_dict[e_method]]
        preprocess_class = self.p_list[self.p_dict[p_method]]
        self.e.extract_features(preprocess_class, work_dir)
        
    def generate_model (self, e_method='rui', c_method='linear_SVM', p_method='pass', work_dir='default'):
        print " Generating model for e_method = '%s' , c_method ='%s' , work_dir = '%s' " %(e_method,c_method, work_dir)
        # make sure that we have extracted the features
        self.extract_features(e_method,p_method, work_dir)    
        # call the classifier with given feature extraction method
        self.c = self.c_list[self.c_dict[c_method]]
        self.c.train(self.e)
        
         
    def prediction (self, e_method ='rui', c_method='linear_SVM', p_method='pass', out_dir='default'):
        print " Prediction model for e_method = '%s' , c_method ='%s' , out_dir = '%s' " %(e_method, c_method, out_dir)
        # make sure that we have extracted the features
        if not self.initialized:
            self.initialize()
        self.e = self.e_list[self.e_dict[e_method]]
        self.extract_features(e_method, p_method, self.e.work_dir)    
        # call the classifier with given feature extraction method
        self.c = self.c_list[self.c_dict[c_method]]
        self.c.predict(self.e)

# -----------------------------------------------        
# The above will go into a module when finalised.
# The code below is an example of how to use it
# -----------------------------------------------        

work_dir = "/scratch/git/LBP/data/small_dataset/work/"
my_extractor  = 'rui-filtered'
my_classifiers = ['RBF_SVM']#, 'linear_SVM']
my_preprocess  = ['cmask']#,'pass']
my_C_list = [10]#, 20, 30]
my_gamma_list= [1e-5]#,1e-6 ]

for my_classifier in my_classifiers:
    for my_preproc in my_preprocess:
        for my_C in my_C_list:
            for my_gamma in my_gamma_list:
                print ' ------------------------------------------------------------------------------'
                print " Using CLS: '%s' , FE: '%s', PP: '%s' " %(my_classifier, my_extractor, my_preproc)
                print " gamma= %f, C=%d" %(my_gamma,my_C)

                a_pp = plant_detection(work_dir = work_dir)
                
                # override the default C and gamma from the classifer definition
                classifier_params_index = a_pp.c_dict[my_classifier]
                (a_pp.c_params[classifier_params_index])['gamma'] = my_gamma
                (a_pp.c_params[classifier_params_index])['C']     = my_C                                                   
                                                      
                # Generate model and get score
                a_pp.generate_model(e_method=my_extractor, c_method=my_classifier, p_method=my_preproc)
                a_pp.prediction(e_method=my_extractor, c_method=my_classifier, p_method=my_preproc)

print 'ALL DONE'

 ------------------------------------------------------------------------------
 Using CLS: 'RBF_SVM' , FE: 'rui-filtered', PP: 'cmask' 
 gamma= 0.000010, C=10
 Generating model for e_method = 'rui-filtered' , c_method ='RBF_SVM' , work_dir = 'default' 


ImportError: libavcodec-ffmpeg.so.56: cannot open shared object file: No such file or directory

In [None]:
from natsort import natsorted

a = "aagiven_string-filtered-k0"
a.split('-filtered')
b = 1.9
my_new_line= '%2.2f %s\n' %(b,a)
#c=list()
c.append(my_new_line)
    
print c
print natsorted(c,reverse = True)

In [None]:
a_pp = plant_detection(work_dir = work_dir)
classifier_params_index = a_pp.c_dict[my_classifier]
print classifier_params_index
(a_pp.c_params[2])['gamma'] = my_gamma
(a_pp.c_params[2])['gamma']