In [25]:
from cnns.core import TorchOpenFace as tof
from cnns.utils import cnn_utils as cu
import cv2
import numpy as np
import pandas as pd
from cnns.core import SOMClusterer as scl
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
from cnns.utils import clustering_utils as clu
import pickle as pkl

Load OpenFace

In [26]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
def compute_hcv_metrics(img_urls, X, proc_settings_ver, user_id, cluster_html,
                        grid_size=15, sigma=1.0, learning_rate=0.25, num_trials=5, num_iter=100, 
                        reduce_dim=False, num_dim=512):
    
    # keep these hard-coded for the time being...
    base_dirpath = '/Users/babasarala/Desktop/face_clustering_full_data'
    comp_type = 'None'
    tagged_csv_filepath = '%s/%i_clusters.csv'%(base_dirpath, user_id)
    tagged_df = pd.read_csv(open(tagged_csv_filepath, 'rb'))
    
    # combine with the CSV file
    url_csv_filepath = '%s/%i/%i_prod_fb_tags_img_urls_%.1f_%s.csv'\
                       %(base_dirpath, user_id, user_id, proc_settings_ver,comp_type)
    url_df = pd.read_csv(open(url_csv_filepath, 'rb'))
    
    #pkl_filepath = '%s/%i/%i_prod_1.2_None_cnn_codes.p'%(base_dirpath, user_id, user_id)
    #img_urls, X = pkl.load(open(pkl_filepath, 'rb'))
    scl_ = scl.SOMClusterer(img_urls, X, grid_size=grid_size, sigma=sigma, learning_rate=learning_rate, 
                            num_trials=num_trials, reduce_dim=reduce_dim, num_dim=num_dim, num_iter=num_iter)
    cluster_df = scl_.run()
    
    gt_df = pd.merge(cluster_df, url_df, left_on='face_url', right_on='img_url')
    cols = ['face_id', 'tag', 'memorable_id', 'cluster_idx', 'face_url']

    merged_df = pd.merge(gt_df, tagged_df, on=['face_id', 'memorable_id', 'user_id'])[cols]
    true_labels = list(merged_df['tag'].values)
    cluster_labels = list(merged_df['cluster_idx'].values)
    h, c, v = homogeneity_completeness_v_measure(true_labels, cluster_labels)
    
    # visualize!
    cluster_map, _, _ = scl_.convert_to_maps(cluster_df)
    clu.visualize_clusters(cluster_map, cluster_html)
    return h, c, v

Single image

In [7]:
img_filepath = '/Users/babasarala/Desktop/face_clustering_full_data/1341/1341_prod_1.2_None_images/9915939467_212664635.jpg'
image = cu.torch_load_image(img_filepath)

In [8]:
tof_.compute_intermediate_output(image)

array([-0.06915157,  0.08261652, -0.06482871, -0.11739713,  0.01845421,
        0.12255683, -0.15433681, -0.00676398, -0.15175544,  0.04406863,
       -0.02034096, -0.09826448,  0.06080227, -0.0373427 ,  0.05672698,
        0.07125273,  0.01132244, -0.03724903,  0.14132334, -0.05332252,
        0.09322547, -0.04973346,  0.12671643, -0.09877689,  0.08428668,
        0.13102978,  0.01476111, -0.17729791,  0.11381032, -0.0809738 ,
        0.20732866, -0.04312443, -0.0412151 , -0.0863544 ,  0.06930441,
        0.0469732 , -0.03583637,  0.11210234,  0.19080943,  0.03834464,
       -0.03698236,  0.0155298 , -0.06165396,  0.10500638, -0.02497769,
       -0.00165782,  0.12768404,  0.02512852, -0.10675585, -0.16037175,
       -0.06120317,  0.01363889,  0.03641329, -0.06257543, -0.14291508,
        0.00700344,  0.14858468,  0.10453454,  0.03000544, -0.13513629,
       -0.03007873,  0.00392263,  0.12632875, -0.05850722,  0.11616812,
       -0.03469099,  0.03010117,  0.10044143,  0.11211664,  0.02

Images from CSV file

In [12]:
user_ids = [1946418, 8657185, 5626377, 5, 5692777, 3473194, 3928074, 4619758, 2685009, 1496616, 1341, 8, 34, 6007945]
proc_settings_ver = 1.2
network_model_filename = 'nn4.small2.v1.t7'

# these won't really change..
models_dirpath = '/Users/babasarala/repos/cnns/models'
dlib_face_predictor = 'shape_predictor_68_face_landmarks.dat'
network_model_filepath = '%s/openface/%s' % (
    models_dirpath, network_model_filename)
dlib_model_filepath = '%s/dlib/%s' % (models_dirpath, dlib_face_predictor)
img_dim = 96
tof_ = tof.TorchClassifier(dlib_model_filepath=dlib_model_filepath, 
                          network_model_filepath=network_model_filepath,
                          img_dim=img_dim)
results_dirpath = '/Users/babasarala/Desktop/openface_experiments'
html_dirpath = '/Users/babasarala/Desktop/openface_experiments/cluster_htmls'

In [8]:
for user_id in user_ids:
    print 'Currently processing user_id: %i'%(user_id)
    csv_filepath = '/Users/babasarala/Desktop/face_clustering_full_data/%i/%i_prod_fb_tags_img_urls_%.1f_None.csv' \
    %(user_id, user_id, proc_settings_ver)
    df = pd.read_csv(csv_filepath)

    img_urls = list(df['img_url'].values)
    X = np.empty((len(img_urls), tof_.get_layer_output_size()))
    valid_img_urls = []
    cnt = 0
    for idx, img_url in enumerate(img_urls):
        image = cu.torch_load_image(img_url)
        try:
            x = tof_.compute_intermediate_output(image)
            X[cnt] = x
            valid_img_urls.append(img_url)
            cnt += 1
        except:
            pass
    X = X[:cnt]

    assert X.shape[0] == len(valid_img_urls)
    pkl.dump((valid_img_urls, X), open('%s/%i_cnn_codes.p'%(results_dirpath, user_id), 'wb'))

Currently processing user_id: 1946418
Currently processing user_id: 8657185
Currently processing user_id: 5626377
Currently processing user_id: 5
Currently processing user_id: 5692777
Currently processing user_id: 3473194
Currently processing user_id: 3928074
Currently processing user_id: 4619758
Currently processing user_id: 2685009
Currently processing user_id: 1496616
Currently processing user_id: 1341
Currently processing user_id: 8
Currently processing user_id: 34
Currently processing user_id: 6007945


Clustering

In [29]:
for user_id in user_ids:
    #print 'Clustering photos for user_id: %i'%(user_id)
    img_urls, X = pkl.load(open('%s/%i_cnn_codes.p'%(results_dirpath, user_id), 'rb'))
    cluster_html = '%s/%i_Openface_default.html'%(html_dirpath, user_id)
    h, c, v = compute_hcv_metrics(img_urls, X, proc_settings_ver, user_id, cluster_html)
    print h, c, v

0.856007437744 0.640948679279 0.733029953832
0.702304131195 0.536095015063 0.608045871042
0.684424888725 0.444187959273 0.538737965172
0.694771461344 0.515331572595 0.591747412786
0.962576499481 0.48539120443 0.645354402832
0.873437903787 0.412379804174 0.560247614404
0.722742142064 0.532565364832 0.613248037557
0.983354120872 0.580133902408 0.729749195512
0.765909425446 0.538003661923 0.632039174363
0.628903692751 0.479115107424 0.543884743226
0.746414735658 0.359002311587 0.484820848696
0.617543600595 0.509146136627 0.558130473819
0.956316955086 0.67728686745 0.792971840375
0.765202506883 0.758162638962 0.761666306389
