# Register by hand

Describes a sequence of steps how to find someone in the global embedding database, and associate their user_id to the idx in the embedding database

In [None]:
import torch
import numpy as np
from PIL import Image
from utils import send_query, send_large_query


### Get a starting point where we assume that the person is present on the image

In [None]:
query_params = {
    'minusoffset': 2000,
    'plusoffset': 2000,
    'name': 'Mitnyik Levente'
}

SQL_QUERY = '''
    SELECT aligned_ID, path, name FROM aligned JOIN (
        SELECT name, timestamp-{minusoffset} as start, timestamp+{plusoffset} as end 
        FROM Mandacsko_log WHERE gate = "Forgóvilla jobb (kintről) BE" AND name = "{name}") 
    ON aligned.timestamp BETWEEN start AND end;
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

query_result = send_query(SQL_QUERY)
display(query_result[0])
display(Image.open(query_result[0]['path']))

### Now find the corresponding embedding for this image

In [None]:
embedding_database = torch.load('ALIGNED_EMBEDDING_DATABASE.pth')
all_paths = embedding_database['paths']
all_embs = embedding_database['embeddings']

In [None]:
# We found 70 registered images, select the 51th
registered_idx = 30
# The database starts counting from 1
query_idx = int(query_result[registered_idx]['aligned_ID']) - 1
embedding_query = all_embs[51696]

### Finally we can get the closest embeddings, and use their `idx` to find the corresponding path

In [None]:
def getKclosest(embedding_query, k=-1):
    anchor_embedding = embedding_query
    distance = ((all_embs-anchor_embedding)**2).mean(-1)
    sorted_distance, idxs = torch.sort(distance)
        
    return all_embs[idxs], idxs[:k], sorted_distance[:k]

In [None]:
def plotKclosest(embedding_query, k):
    _, idxs, distance = getKclosest(embedding_query, k)
    for i, (idx, d) in enumerate(zip(idxs.data, distance.data)):
        print(i, idx, d)
        display(Image.open(all_paths[idx]))

In [None]:
plotKclosest(embedding_query, 85)

### Update the corresponding record in the `aligned` table in the SQL database

In [None]:
send_query('SELECT COUNT(*) FROM aligned WHERE user_ID = %d'%3)

In [None]:
def updateAligned(user_id, embedded_idxs, verbose=False):
    FULL_QUERY = ''
    for idx in embedded_idxs:
        query_params = {
            'user_id': user_id,
            'idx': idx + 1
        }

        UPDATE_SQL = """
            UPDATE aligned
            SET user_ID = {user_id}
            WHERE aligned_ID = {idx};
        """
        UPDATE_SQL = UPDATE_SQL.format(**query_params)
        FULL_QUERY += UPDATE_SQL
    
    send_query(FULL_QUERY, verbose=verbose)
    if verbose:
        total_count = send_query(
            'SELECT COUNT(*) FROM aligned WHERE user_ID = %d'%user_id)[0]['COUNT(*)']
        print('# total images assigned to user_ID %d: %3d'%(user_id, int(total_count)))
        return total_count

In [None]:
# Determine the user_ID by hand...
# Levi is the first user, in the user table
# TODO: populate the user table from the registration_logs and the Mandácskó logs
user_id = 1
_, candidate_indices, _ = getKclosest(embedding_query, 85)
updateAligned(user_id, embedded_idxs=candidate_indices.data)
    

### Experimental: try to do depth first search on candidate images.

step 1: Have a fix image (that you are sure about) 

step 2: find the 10 closest and assign them to the user

step 3: remember your image to not to look at it again - and go to step 1 on the 1st match

This may go wrong at some point - ideally you would find all the closest images and 


In [None]:
proximity = 85
start_idx = 51710
isVisited = {start_idx}
_, candidate_indices, _ = getKclosest(all_embs[start_idx], proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    i = toVisit.pop()
    if i in isVisited:
        continue
    isVisited.add(i)
    _, candidate_indices, _ = getKclosest(all_embs[idx], proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    #new_candidates.reverse()
    toVisit += new_candidates
    count += 1
    print(count, i)
    display(Image.open(all_paths[i]))
    

### Experimental: Iterative MEAN search

step 1: Have a fix image (that you are sure about) 

step 2: find the `proximity` closest to the current `mean_emb`

step 3: remember your image to not to look at it again

step 4: update `mean_emb` and go to step 1 on the 1st match

This may go wrong at some point - ideally you would find all the closest images and 


In [None]:
proximity = 85
start_idx = 51696
isVisited = [start_idx]

mean_emb = all_embs[start_idx]

_, candidate_indices, _ = getKclosest(mean_emb, proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    # pop(0) -> Breadth first search
    #i = toVisit.pop(0)
    # pop(-1) -> Depth first search
    i = toVisit.pop(-1)
    if i in isVisited:
        continue
    isVisited.append(i)
    new_mean_emb = (mean_emb + all_embs[i]) / 2.
    _, candidate_indices, _ = getKclosest(new_mean_emb, proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    new_candidates.reverse()
    toVisit += new_candidates
    mean_emb = new_mean_emb
    
    count += 1
    print(count, i)    
    

In [None]:
for i, idx in enumerate(isVisited):
    print(i)
    display(Image.open(all_paths[idx]))

### Conclusion: Iterative MEAN search seems to be quite effective in generalizing over faces

So I update again the database

In [None]:
updateAligned(user_id=1, embedded_idxs=isVisited[:168])

## Let's see how far we got

In [None]:
query_params = {
    'name': 'Mitnyik Levente'
}

SQL_QUERY = '''
    SELECT aligned_ID, path, timestamp FROM aligned JOIN (
        SELECT user_ID FROM user WHERE name = "{name}") 
    ON aligned.user_ID;
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

aligned_path = send_query(SQL_QUERY)

In [None]:
for i, q in enumerate(aligned_path):
    display(i)
    display(Image.open(q['path']))

### If there are a lot of random faces associated... so tidy up the whole user

In [None]:
updateAligned('NULL', [int(q['aligned_ID'])-1 for q in aligned_path])

### Check out another user

In [None]:
query_params = {
    'minusoffset': 2000,
    'plusoffset': 2000,
    'name': 'Hakkel Tamás'
}

SQL_QUERY = '''
    SELECT aligned_ID, path, name FROM aligned JOIN (
        SELECT name, timestamp-{minusoffset} as start, timestamp+{plusoffset} as end 
        FROM Mandacsko_log WHERE gate = "Forgóvilla jobb (kintről) BE" AND name = "{name}") 
    ON aligned.timestamp BETWEEN start AND end;
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

query_result = send_query(SQL_QUERY)
display(query_result[0])
for i, q in enumerate(query_result):
    print(i, q['aligned_ID'])
    display(Image.open(q['path']))

In [None]:
proximity = 85
start_idx = 38038
isVisited = [start_idx]

mean_emb = all_embs[start_idx]

_, candidate_indices, _ = getKclosest(mean_emb, proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    # pop(0) -> Breadth first search
    #i = toVisit.pop(0)
    # pop(-1) -> Breadth first search
    i = toVisit.pop(-1)
    if i in isVisited:
        continue
    isVisited.append(i)
    new_mean_emb = (mean_emb + all_embs[i]) / 2.
    _, candidate_indices, _ = getKclosest(new_mean_emb, proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    new_candidates.reverse()
    toVisit += new_candidates
    mean_emb = new_mean_emb
    
    count += 1
    print(count, i)    
    display(Image.open(all_paths[i]))

In [None]:
updateAligned(user_id=2, embedded_idxs=isVisited[:564])

### Double check

In [None]:
send_query('SELECT user_ID FROM user WHERE name = "Hakkel Tamás"')

In [None]:
send_large_query('SELECT aligned_ID FROM aligned')

In [None]:
query_params = {
    'name': 'Hakkel Tamás'
}

SQL_QUERY = '''
    SELECT aligned_ID, path
    FROM aligned 
    JOIN user ON user.user_ID
    WHERE name = "{name}"
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

aligned_path = send_query('SELECT path FROM aligned WHERE user_ID = 2')
#OK aligned_path = send_large_query('SELECT aligned_ID FROM aligned WHERE user_ID = 2')
#NO aligned_path = send_large_query(SQL_QUERY)
#NO aligned_path = send_query(SQL_QUERY)

for i, q in enumerate(aligned_path):
    display(i)
    display(Image.open(q['path']))

### Check out myself

In [None]:
proximity = 10
start_idx = 279395
isVisited = [start_idx]

mean_emb = all_embs[start_idx]

_, candidate_indices, _ = getKclosest(mean_emb, proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    # pop(0) -> Breadth first search
    #i = toVisit.pop(0)
    # pop(-1) -> Breadth first search
    i = toVisit.pop(-1)
    if i in isVisited:
        continue
    isVisited.append(i)
    new_mean_emb = (mean_emb + all_embs[i]) / float(len(isVisited) + 1)
    _, candidate_indices, _ = getKclosest(new_mean_emb, proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    new_candidates.reverse()
    toVisit += new_candidates
    mean_emb = new_mean_emb
    
    count += 1
    print(count, i)    
    display(Image.open(all_paths[i]))

In [None]:
updateAligned(3, isVisited[1:14], verbose=True)

In [None]:
def getMeanof(idxs):
    selected_embs = torch.stack([all_embs[idx] for idx in idxs])
    mean_emb = torch.mean(selected_embs, dim=0)
    return mean_emb
    

In [None]:
plotKclosest(embedding_query=getMeanof(isVisited[:48]), k=100)

### Why not make a Plot-so-far function

In [None]:
def getUserIndices(user_ID, plot=False):
    query_params = {
        'user_ID': user_ID
    }

    SQL_QUERY = '''
        SELECT aligned_ID, path
        FROM aligned
        WHERE user_ID = {user_ID}
    '''
    SQL_QUERY = SQL_QUERY.format(**query_params)

    query_return = send_query(SQL_QUERY)
    
    if plot:
        for i, q in enumerate(query_return):
            print(i, q['aligned_ID'])
            display(Image.open(q['path']))
    return [int(q['aligned_ID']) - 1 for q in query_return]

In [None]:
user_idxs = getUserIndices(3, True)

In [None]:
_, idxs, _ = plotKclosest(embedding_query=getMeanof(user_idxs), k=100)

In [None]:
query_params = {
    'minusoffset': 2000,
    'plusoffset': 2000,
    'name': 'Steinbach László'
}

SQL_QUERY = '''
    SELECT aligned_ID, path, name FROM aligned JOIN (
        SELECT name, timestamp-{minusoffset} as start, timestamp+{plusoffset} as end 
        FROM Mandacsko_log WHERE gate = "Forgóvilla jobb (kintről) BE" AND name = "{name}") 
    ON aligned.timestamp BETWEEN start AND end;
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

query_result = send_query(SQL_QUERY)
display(query_result[0])
for i, q in enumerate(query_result):
    print(i, q['aligned_ID'])
    display(Image.open(q['path']))

In [None]:
updateAligned(4, [int(q['aligned_ID'])-1 for q in query_result], verbose=True)

In [None]:
proximity = 85
start_idx = 10047
isVisited = [start_idx]

mean_emb = all_embs[start_idx]

_, candidate_indices, _ = getKclosest(mean_emb, proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    # pop(0) -> Breadth first search
    #i = toVisit.pop(0)
    # pop(-1) -> Breadth first search
    i = toVisit.pop(-1)
    if i in isVisited:
        continue
    isVisited.append(i)
    new_mean_emb = (mean_emb + all_embs[i]) / float(len(isVisited) + 1)
    _, candidate_indices, _ = getKclosest(new_mean_emb, proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    new_candidates.reverse()
    toVisit += new_candidates
    mean_emb = new_mean_emb
    
    count += 1
    print(count, i)    
    display(Image.open(all_paths[i]))

In [None]:
user_idxs = getUserIndices(4, False)

In [None]:
plotKclosest(getMeanof(user_idxs), 200)

In [None]:
proximity = 85
start_idx = 10047
isVisited = [start_idx]

mean_emb = getMeanof(user_idxs)

_, candidate_indices, _ = getKclosest(mean_emb, proximity)
toVisit = [i.data[0] for i in candidate_indices]
count = 0
while len(toVisit) > 0:
    # pop(0) -> Breadth first search
    #i = toVisit.pop(0)
    # pop(-1) -> Breadth first search
    i = toVisit.pop(-1)
    if i in isVisited:
        continue
    isVisited.append(i)
    new_mean_emb = (mean_emb + all_embs[i]) / float(len(isVisited) + 1)
    _, candidate_indices, _ = getKclosest(new_mean_emb, proximity)
    new_candidates = [i.data[0] for i in candidate_indices]
    new_candidates.reverse()
    toVisit += new_candidates
    mean_emb = new_mean_emb
    
    count += 1
    print(count, i)    
    display(Image.open(all_paths[i]))

In [None]:
updateAligned(4, isVisited[:119], verbose=True)

In [None]:
getUserIndices(4, True)