In [1]:
import time
import datetime
import logging
import pandas as pd
from cnns.utils import cnn_utils as cu
from cnns.utils import classify_utils as clu
import pickle as pkl
import numpy as np

In [2]:
class MPDataProcessor(object):

    def __init__(self, output_dirpath, logging_dirpath, logging_filename, start_idx=0, end_idx=None,
                 checkpoint=100, show_time=False):
        self.output_dirpath = output_dirpath
        self.logging_dirpath = logging_dirpath
        self.logging_filename = logging_filename

    def process_items(self, 
                      iterable,
                      container, 
                      update_container, 
                      save_container, 
                      reset_container, 
                      start_idx=0,
                      end_idx=None,
                      checkpoint=100,
                      show_time=True,
                      *args):
        '''  '''
        
        curr_date = datetime.datetime.now()
        logging.basicConfig(filename='%s/%s_%s.out' % (self.output_dirpath, curr_date.isoformat('_'), 
                                                       self.logging_filename),
                            level=logging.INFO)
        if show_time:
            start_time = time.time()
        assert start_idx < len(iterable)
        if end_idx is None:
            end_idx = len(iterable)
        else:
            end_idx = min(end_idx, len(iterable))

        cnt = 0
        for item in iterable[start_idx:end_idx]:
            try:
                update_container(item, cnt, container, *args)
                if (cnt + 1) % checkpoint == 0:
                    info_str = 'Processed %i items, index %i' % (
                        cnt + 1, cnt + start_idx + 1)
                    if show_time:
                        info_str = '%s, Elapsed time since start: %.2fs' % (
                            info_str, time.time() - start_time)
                    logging.info(info_str)
                    print info_str
                    save_container(output_dirpath, cnt + start_idx + 1, container)
                    reset_container(container, *args)
                cnt += 1
            except cu.ProcessingError as err:
                err_str = 'Loading error: %s' % err
                logging.error(err_str)
                print err_str
            except:
                err_str = 'Unknown error'
                logging.error(err_str)
        
        save_container(self.output_dirpath, cnt + start_idx, container)


In [3]:
def process_img_url_cnn_codes(img_urls,
                              fe_model,
                              layer,
                              output_dirpath,
                              logging_dirpath='/Users/babasarala/repos/cnns/logs',
                              logging_filename='deep_features',
                              start_idx=0,
                              end_idx=None,
                              checkpoint=100,
                              show_time=False,
                              src='url'):
    
    container = {}
    container['X'] = np.empty((checkpoint, fe_model.get_layer_output_size(layer)))
    container['curr_img_urls'] = []
    
    data_proc = MPDataProcessor(output_dirpath, logging_dirpath, logging_filename)
    
    def update_container(img_url, cnt, container):
        image = cu.load_image_and_check_format(img_filepath_or_url=img_url,
                                                   src=src)
        output = fe_model.compute_intermediate_output(
            image, layer_name=layer)
        
        if output is not None:
            container['curr_img_urls'].append(img_url)
            container['X'][cnt % checkpoint] = output
    
    def save_container(output_dirpath, cnt, container):
        assert (cnt - len(container['curr_img_urls'])) % checkpoint == 0
        X = container['X'][:cnt]
        pkl.dump((container['curr_img_urls'], X), open('%s/%s_%s_%i.p'%(output_dirpath, fe_model.model_name, layer, 
                                                                       cnt), 'wb'))
    def reset_container(container):
        container['X'] = np.empty((checkpoint, fe_model.get_layer_output_size(layer)))
        container['curr_img_urls'] = []
    
    data_proc.process_items(img_urls, 
                            container, 
                            update_container,
                            save_container, 
                            reset_container, 
                            start_idx, 
                            end_idx, 
                            checkpoint, 
                            show_time) 

In [4]:
def process_img_url_cats(img_urls, 
                         main_model, 
                         fe_model, 
                         custom_model, 
                         config, 
                         output_dirpath,
                         return_type='prob',
                         logging_dirpath='/Users/babasarala/repos/cnns/logs',
                         logging_filename='categories',
                         start_idx=0,
                         end_idx=None,
                         checkpoint=100,
                         show_time=False,
                         src='url'):
    container = {}
    data_proc = MPDataProcessor(output_dirpath, logging_dirpath, logging_filename)
    
    def update_container(img_url, cnt, container):
        image = cu.load_image(img_url, src=src)
        collapsed_cat_probs = clu.classify(img_map={img_url: image},
                                           main_model=main_model,
                                           fe_model=fe_model,
                                           custom_model=custom_model,
                                           config=config,
                                           return_type=return_type)
        container.update(collapsed_cat_probs)
    
    def save_container(output_dirpath, cnt, container):
        assert (cnt - len(container)) % checkpoint == 0
        pkl.dump(container, open('%s/%s_%i.p'%(output_dirpath, main_model.model_name, cnt), 'wb'))
    
    def reset_container(container):
        container.clear()
        
    data_proc.process_items(img_urls,
                            container,
                            update_container,
                            save_container,
                            reset_container,
                            start_idx,
                            end_idx,
                            checkpoint,
                            show_time)

In [5]:
img_urls = list(pd.read_csv('/Users/babasarala/Desktop/imagenet_urls_random.csv')['img_url'].values)

In [6]:
config_dirpath = '/Users/babasarala/repos/cnns/config'
model_dirpath = '/Users/babasarala/repos/cnns/models'
settings_filename = 'settings_v1.9.ini'
main_model, fe_model, custom_model, config = clu.init_model_and_config(config_dirpath, settings_filename, 
                                                                       model_dirpath)

In [None]:
output_dirpath = '/Users/babasarala/Desktop/test_dir'
process_img_url_cnn_codes(img_urls, fe_model, 'pool3', output_dirpath)

In [7]:
output_dirpath = '/Users/babasarala/Desktop/test_dir_2'
process_img_url_cats(img_urls, main_model, fe_model, custom_model, config, output_dirpath)

ERROR:root:Loading error: 'Image failed to Load'
ERROR:root:Loading error: 'Image failed to Load'


Loading error: 'Image failed to Load'
Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'
Processed 100 items, index 100

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'

ERROR:root:Loading error: 'Image failed to Load'



Loading error: 'Image failed to Load'
Processed 200 items, index 200
