### Test Re ranking

In [5]:
import numpy as np
import torch

def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat = None, only_local = False):
    # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor
    query_num = probFea.size(0)
    all_num = query_num + galFea.size(0)
    if only_local:
        original_dist = local_distmat
    else:
        feat = torch.cat([probFea,galFea])
        # print('using GPU to compute original distance')
        distmat = torch.pow(feat,2).sum(dim=1, keepdim=True).expand(all_num,all_num) + \
                      torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t()
        distmat.addmm_(1,-2,feat,feat.t())
        original_dist = distmat.numpy()
        del feat
        if not local_distmat is None:
            original_dist = original_dist + local_distmat
    gallery_num = original_dist.shape[0]
    original_dist = np.transpose(original_dist / np.max(original_dist, axis=0))
    V = np.zeros_like(original_dist).astype(np.float16)
    initial_rank = np.argsort(original_dist).astype(np.int32)

#     print('starting re_ranking')
    for i in range(all_num):
        # k-reciprocal neighbors
        forward_k_neigh_index = initial_rank[i, :k1 + 1]
        backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
        fi = np.where(backward_k_neigh_index == i)[0]
        k_reciprocal_index = forward_k_neigh_index[fi]
        k_reciprocal_expansion_index = k_reciprocal_index
        for j in range(len(k_reciprocal_index)):
            candidate = k_reciprocal_index[j]
            candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2)) + 1]
            candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,
                                               :int(np.around(k1 / 2)) + 1]
            fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
            if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len(
                    candidate_k_reciprocal_index):
                k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index)

        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
        weight = np.exp(-original_dist[i, k_reciprocal_expansion_index])
        V[i, k_reciprocal_expansion_index] = weight / np.sum(weight)
    original_dist = original_dist[:query_num, ]
    if k2 != 1:
        V_qe = np.zeros_like(V, dtype=np.float16)
        for i in range(all_num):
            V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0)
        V = V_qe
        del V_qe
    del initial_rank
    invIndex = []
    for i in range(gallery_num):
        invIndex.append(np.where(V[:, i] != 0)[0])

    jaccard_dist = np.zeros_like(original_dist, dtype=np.float16)

    for i in range(query_num):
        temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16)
        indNonZero = np.where(V[i, :] != 0)[0]
        indImages = [invIndex[ind] for ind in indNonZero]
        for j in range(len(indNonZero)):
            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]],
                                                                               V[indImages[j], indNonZero[j]])
        jaccard_dist[i] = 1 - temp_min / (2 - temp_min)

    final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value
    del original_dist
    del V
    del jaccard_dist
    final_dist = final_dist[:query_num, query_num:]
    return final_dist

def eval_simplified_with_matches(distmat, q_pids, g_pids):
    indices = np.argsort(distmat, axis=1)  # Sorted indices of gallery samples for each query, axis=1 means columns so horizontally
    # q_pids[:, np.newaxis] == q_pids.reshape(4,1) or better q_pids.reshape(-1,1)
    matchs = np.hstack((q_pids[:, np.newaxis], g_pids[indices]))
    return matchs

6592, # Solucionado
6564, # Solucionado

6022, # Pendiente Debug 4 Tomando los ultimos 3 de promedio se sanea
9161, # Pendiente Debug 5 Se achica la caja mucho
4333, # Pendiente Debug 5 Se achica la caja mucho
10445, # Tb algo con las cajas
5392, # Reemplazo por 5444
5521,
6611,
5015,
1021,
12063,


2889*,
9623*,
236*,
5239*,

In [6]:
import os
import pandas as pd
import torch
from scipy.spatial.distance import cdist

def perform_re_ranking(features_csv, n_images=4, max_number_back_to_compare=60, K1=4, K2=2, LAMBDA=0.3, filter_know_matches=None, save_csv_dir=None):
    features = pd.read_csv(features_csv)
    for col in features.columns[3:]:
        features[col] = features[col].astype(float)
    
    ids_correct_outs = []
    ids_correct_ins = []
    
    if filter_know_matches:
        correct_labels = pd.read_csv(filter_know_matches)
        ids_correct_outs = correct_labels['OUT'].values
        ids_correct_ins = correct_labels['IN'].values

    # print(f"Correct OUTs: {len(ids_correct_outs)} Total OUTs: {len(features[features['Direction'] == 'Out']['ID'].unique())} Diff: {len(features[(features['Direction'] == 'Out') & (~features['ID'].isin(ids_correct_outs)) ]['ID'].unique())}")
    # print(f"Correct INs: {len(ids_correct_ins)} Total INs: {len(features[features['Direction'] == 'In']['ID'].unique())} Diff: {len(features[(features['Direction'] == 'In') & (~features['ID'].isin(ids_correct_ins)) ]['ID'].unique())}")

    id_out_list = features[(features['Direction'] == 'Out') & (~features['ID'].isin(ids_correct_outs))]['ID'].unique()
    id_in_list = features[(features['Direction'] == 'In') & (~features['ID'].isin(ids_correct_ins))]['ID'].unique()

    results_list = []

    for id_out in id_out_list:
        if id_out < id_in_list[0]:
            continue

        filtered_query_features = features[features['ID'] == id_out]
        query_features = filtered_query_features.iloc[:, 3:].to_numpy()
        query = torch.tensor(query_features, dtype=torch.float32)
        q_pids = filtered_query_features['Name'].values
        # q_pids = filtered_query_features['ID'].values

        subset_ids_gallery_comparisson = features[(features['ID'] < id_out) & (features['Direction'] == 'In')]['ID'].unique()[-max_number_back_to_compare:]
        interest_gallery = features[features['ID'].isin(subset_ids_gallery_comparisson)]

        gallery_features = interest_gallery.iloc[:, 3:].to_numpy()
        gallery = torch.tensor(gallery_features, dtype=torch.float32)
        g_pids = interest_gallery['Name'].values

        query = query / query.norm(dim=1, keepdim=True)
        gallery = gallery / gallery.norm(dim=1, keepdim=True)

        distmat = re_ranking(query, gallery, K1, K2, LAMBDA)
        matching_gallery_ids = eval_simplified_with_matches(distmat, q_pids, g_pids)
        for row in matching_gallery_ids[:, :n_images + 1]:
            results_list.append(row.tolist())

    column_names = ['query'] + [f'rank{i}' for i in range(1, n_images + 1)]
    re_ranking_results = pd.DataFrame(results_list, columns=column_names)

    file_name = f're_ranking_k1_{K1}_k2_{K2}_lamba_{LAMBDA}_num_img_{n_images}_{"filtered" if filter_know_matches else "all"}'
    if save_csv_dir:
        CSV_FILE_PATH = os.path.join(save_csv_dir, f'{file_name}.csv')
        re_ranking_results.to_csv(CSV_FILE_PATH, index=False)

    return re_ranking_results,file_name

### Re ranking HTML

In [7]:
import datetime
import os
import base64
import pandas as pd

def generate_html_report(re_ranking_data, base_folder, frame_rate, re_rank_html):
    def seconds_to_time(seconds):
        td = datetime.timedelta(seconds=seconds)
        time = (datetime.datetime.min + td).time()
        return time.strftime("%H:%M:%S")
    def number_to_letters(num):
        mapping = {i: chr(122 - i) for i in range(10)}
        num_str = str(num)
        letter_code = ''.join(mapping[int(digit)] for digit in num_str)
        return letter_code

    def _image_formatter(image_name, query_frame_number):
        folder_id = image_name.split('_')[1]
        img_path = os.path.join(base_folder, str(folder_id), f"{image_name}.png")
        try:
            img_frame_number = int(image_name.split('_')[2])
            with open(img_path, "rb") as f:
                encoded_string = base64.b64encode(f.read()).decode()
                time = seconds_to_time(max(0,(query_frame_number - img_frame_number)) // frame_rate)
                video_time = seconds_to_time((int(image_name.split('_')[2])// frame_rate))
                return f'<div><img width="125" src="data:image/png;base64,{encoded_string}"><div>ID: {image_name.split("_")[1]}_{number_to_letters(image_name.split("_")[2])} - {time} </div><div>{video_time}</div></div>'
        except OSError as e:
            return f"OSError: {e}, File: {img_path}"


    # Check if re_ranking_data is a path (string) or a DataFrame and load accordingly
    if isinstance(re_ranking_data, str):
        re_ranking = pd.read_csv(re_ranking_data)
    elif isinstance(re_ranking_data, pd.DataFrame):
        re_ranking = re_ranking_data
    else:
        raise ValueError("re_ranking_data must be a path to a CSV file or a pandas DataFrame")

    df = re_ranking.copy()
    # df['IndexImg'] = re_ranking.groupby('query').cumcount() + 1
    df['frame_number_query'] = df['query'].apply(lambda x: int(x.split('_')[2]))

    for column in df.columns.drop('frame_number_query'):
        df[column] = df.apply(lambda x: _image_formatter(x[column],x['frame_number_query']), axis=1)

    html_df = df.drop(['frame_number_query'],axis=1).to_html(escape=False, index=False)

    with open(re_rank_html, 'w') as file:
        file.write(html_df)

### Calculate Metrics

In [17]:
import pandas as pd

def calculate_metrics(true_matches_path, re_ranking_path):
    # Read the true matches data
    true_matches = pd.read_csv(true_matches_path)

    # Check if re_ranking_path is a string (indicating a filepath) or a DataFrame
    if isinstance(re_ranking_path, str):
        re_ranking = pd.read_csv(re_ranking_path)
    elif isinstance(re_ranking_path, pd.DataFrame):
        re_ranking = re_ranking_path
    else:
        raise ValueError("re_ranking_path must be a filepath (string) or a pandas DataFrame")

    # Convert the 'query' and rank columns to integers if they contain ID strings
    # for column in re_ranking.columns:
    #     re_ranking[column] = re_ranking[column].apply(lambda x: int(x.split('_')[1]))

    # Filter the re_ranking DataFrame based on 'query' values present in 'true_matches'
    re_ranking = re_ranking[re_ranking['query'].isin(true_matches['OUT'].values)]

    
    total_values = set(re_ranking['query'].values)
    true = set(true_matches['OUT'].values)
    np.array(true-total_values)	
 

    rank1_count = 0
    rank5_count = 0
    rank5_total_count = 0
    average_precisions = []
    number_instances_per_query = re_ranking['query'].value_counts().to_list()[0]
    rank1_per_id = {}  # Dictionary to track rank-1 per ID

    # Iterate over each query ID in true_matches
    for index, row in true_matches.iterrows():
        query_id = row['OUT']
        true_id = row['IN']
        
        # Get the ranking for the current query ID
        rankings = re_ranking[re_ranking['query'] == query_id]

        # Check if true ID is in any rank position and calculate average precision
        for _, ranking_row in rankings.iterrows():
            hit_ranks = []
            for i in range(1, 6):
                if true_id == ranking_row[f'rank{i}']:
                    if i == 1:
                        rank1_count += 1
                        rank1_per_id[true_id] = rank1_per_id.get(true_id, 0) + 1
                    rank5_total_count += 1
                    hit_ranks.append(i)
        
        # Calculate average precision for this query
        if hit_ranks:
            average_precision = sum(1.0 / rank for rank in hit_ranks) / len(hit_ranks)
            average_precisions.append(average_precision)
    
    rank1_percentage = (rank1_count / (len(true_matches) * number_instances_per_query)) * 100
    rank5_percentage = (rank5_count / (len(true_matches) * number_instances_per_query)) * 100
    matches_rank5_percentage = (rank5_total_count / 20) / len(true_matches) * 100
    mAP = (sum(average_precisions) / len(average_precisions)) * 100 if average_precisions else 0

    metrics = {
        'Rank1': round(rank1_percentage, 2),
        'Rank5': round(rank5_percentage, 2),
        'Matches#Rank5': round(matches_rank5_percentage, 2),
        'mAP': round(mAP, 2),
        'Rank1_per_ID': rank1_per_id  # Include the rank-1 counts per ID in the output
    }

    return metrics

In [9]:
features_csv = '/home/diego/Documents/yolov7-tracker/output/conce_solider_in-out_DB.csv'
BASE_FOLDER = '/home/diego/Documents/yolov7-tracker/imgs_conce_top4/'
FRAME_RATE = 15
n_images = 8
max_number_back_to_compare = 57
K1 = 8
K2 = 3
LAMBDA = 0
filter_know_matches = '/home/diego/Desktop/MatchSimple.csv'  
# filter_know_matches = None
save_csv_dir = '/home/diego/Documents/yolov7-tracker/output'

results, file_name = perform_re_ranking(features_csv,
                                        n_images=n_images,
                                         max_number_back_to_compare=max_number_back_to_compare,
                                         K1=K1,
                                         K2=K2,
                                         LAMBDA=LAMBDA,
                                         filter_know_matches=None,
                                         save_csv_dir=save_csv_dir)


# Complete
RE_RANK_HTML = os.path.join(save_csv_dir, f'{file_name}.html')

generate_html_report(results, BASE_FOLDER, FRAME_RATE, RE_RANK_HTML)


# Only HTML with re calculating the excel, comment also perform_re_ranking
# results = '/home/diego/Documents/yolov7-tracker/output/re_ranking_k1_4_k2_2_lamba_0.3_num_img_8_filtered.csv'
# file_name = 'only_html_without_calculating_excel'
# RE_RANK_HTML = os.path.join(save_csv_dir, f'{file_name}.html')
# generate_html_report(results, BASE_FOLDER, FRAME_RATE, RE_RANK_HTML)
metrics = calculate_metrics(filter_know_matches, results)
print(metrics)


{'Rank1': 62.39, 'Rank5': 0.0, 'Matches#Rank5': 49.29, 'mAP': 52.57, 'Rank1_per_ID': {100: 4, 93: 4, 33: 4, 221: 3, 219: 4, 287: 3, 213: 4, 257: 4, 398: 4, 399: 1, 258: 4, 382: 4, 459: 3, 314: 4, 478: 3, 410: 3, 547: 4, 619: 4, 545: 2, 396: 4, 754: 4, 741: 4, 765: 4, 792: 2, 529: 4, 829: 4, 838: 4, 737: 4, 1014: 4, 834: 4, 1004: 2, 1103: 4, 1127: 4, 1161: 4, 1189: 4, 1110: 2, 761: 4, 1350: 4, 1268: 2, 1432: 4, 1415: 4, 1412: 3, 1408: 4, 1543: 3, 1690: 4, 1619: 3, 1389: 4, 1015: 3, 1401: 4, 1797: 3, 1798: 4, 1786: 3, 1788: 3, 1863: 4, 1880: 4, 1874: 4, 1945: 4, 1899: 4, 1969: 3, 1594: 4, 1810: 3, 2176: 4, 1984: 3, 2239: 4, 2087: 4, 2101: 4, 2368: 1, 2374: 4, 1717: 3, 2105: 3, 2475: 4, 2540: 4, 2518: 4, 2563: 4, 2140: 4, 2564: 3, 2685: 1, 2672: 4, 2615: 4, 2753: 4, 2793: 4, 2859: 2, 2886: 4, 2887: 4, 2596: 4, 3104: 4, 3256: 4, 3101: 4, 2585: 1, 3280: 2, 3065: 4, 3075: 4, 3079: 3, 3411: 2, 3469: 4, 3467: 3, 3780: 4, 3668: 4, 3922: 4, 3839: 1, 3942: 4, 3943: 1, 2656: 4, 3949: 2, 3841: 4, 4

In [18]:
metrics = calculate_metrics(filter_know_matches, results)
print(metrics)

{'Rank1': 62.98, 'Rank5': 0.0, 'Matches#Rank5': 49.78, 'mAP': 52.28, 'Rank1_per_ID': {100: 4, 93: 4, 33: 4, 221: 3, 219: 4, 287: 3, 213: 4, 257: 4, 398: 4, 399: 1, 258: 4, 382: 4, 459: 3, 314: 4, 478: 3, 410: 3, 547: 4, 619: 4, 545: 2, 396: 4, 754: 4, 741: 4, 765: 4, 792: 2, 529: 4, 829: 4, 838: 4, 737: 4, 1014: 4, 834: 4, 1004: 2, 1103: 4, 1127: 4, 1161: 4, 1189: 4, 1110: 2, 761: 4, 1350: 4, 1268: 2, 1432: 4, 1415: 4, 1412: 3, 1408: 4, 1543: 3, 1690: 4, 1619: 3, 1389: 4, 1015: 3, 1401: 4, 1797: 3, 1798: 4, 1786: 3, 1788: 3, 1863: 4, 1880: 4, 1874: 4, 1945: 4, 1899: 4, 1969: 3, 1594: 4, 1810: 3, 2176: 4, 1984: 3, 2239: 4, 2087: 4, 2101: 4, 2368: 1, 2374: 4, 1717: 3, 2105: 3, 2475: 4, 2540: 4, 2518: 4, 2563: 4, 2140: 4, 2564: 3, 2685: 1, 2672: 4, 2615: 4, 2753: 4, 2793: 4, 2859: 2, 2886: 4, 2887: 4, 2596: 4, 3104: 4, 3256: 4, 3101: 4, 2585: 1, 3280: 2, 3065: 4, 3075: 4, 3079: 3, 3411: 2, 3469: 4, 3467: 3, 3780: 4, 3668: 4, 3922: 4, 3839: 1, 3942: 4, 3943: 1, 2656: 4, 3949: 2, 3841: 4, 4

### Search Grid

In [None]:
from itertools import product
import os
from tqdm import tqdm  # if you want to see the progress
import pandas as pd

def calculate_metrics(true_matches_path, re_ranking_path):
    # Read the data
    true_matches = pd.read_csv(true_matches_path)
    re_ranking = pd.read_csv(re_ranking_path)
    re_ranking = re_ranking[re_ranking['query'].isin(true_matches['OUT'].values)]

    rank1_count = 0
    rank5_count = 0
    rank5_total_count = 0
    average_precisions = []
    number_instances_per_query = re_ranking['query'].value_counts().to_list()[0]

    # Iterate over each query ID in true_matches
    for index, row in true_matches.iterrows():
        query_id = row['OUT']
        true_id = row['IN']
        
        # Get the ranking for the current query ID
        rankings = re_ranking[re_ranking['query'] == query_id]

        
        # Check if true ID is in any rank position and calculate average precision
        for _, ranking_row in rankings.iterrows():
            hit_ranks = []
            if true_id in ranking_row.values:
                rank5_count += 1
            for i in range(1, 6):
                if true_id == ranking_row[f'rank{i}']:
                    if i == 1:
                        rank1_count += 1 # Solo lo cuento una vez por fila
                    rank5_total_count += 1 # Lo cuento cada vez que aparece
                    hit_ranks.append(i)
        
        # Calculate average precision for this query
        if hit_ranks:
            average_precision = sum(1.0 / rank for rank in hit_ranks) / len(hit_ranks)
            average_precisions.append(average_precision)
    
    rank1_percentage = (rank1_count / (len(true_matches) * number_instances_per_query)) * 100
    rank5_percentage = (rank5_count / (len(true_matches) * number_instances_per_query)) * 100
    matches_rank5_percentage = (rank5_total_count / 20) / len(true_matches) * 100
    mAP = (sum(average_precisions) / len(average_precisions)) * 100 if average_precisions else 0

    return {
        'Rank1': round(rank1_percentage,2),
        'Rank5': round(rank5_percentage,2),
        'Matches#Rank5': round(matches_rank5_percentage,2),
        'mAP': round(mAP,2)
    }

features_csv = '/home/diego/Documents/yolov7-tracker/output/conce_solider_in-out_DB.csv'
save_csv_dir = '/home/diego/Documents/yolov7-tracker/output'
matches = '/home/diego/Desktop/MatchSimple.csv'  

n_images = 8
max_number_back_to_compare = 60

# Define the range of parameters for K1, K2, and LAMBDA
K1_values = range(2, 21)  # K1 from 2 to 20
K2_values = range(1, 20)  # Temporary range for K2, will be adjusted dynamically to be less than K1
LAMBDA_values = [i/10 for i in range(1, 10)]  # LAMBDA from 0.1 to 0.9

# Create all possible combinations of parameters, with additional logic to enforce K2 < K1
params = [(k1, k2, lambda_val) for k1 in K1_values for k2 in K2_values if k2 < k1 for lambda_val in LAMBDA_values]

best_rank1 = best_rank5 = best_map = 0
best_combination = None

# Iterate over all combinations of parameters
for param_set in tqdm(params):
    K1, K2, LAMBDA = param_set

    # Perform re-ranking with the current set of parameters
    results, file_name = perform_re_ranking(features_csv,
                                            n_images=n_images,
                                            max_number_back_to_compare=max_number_back_to_compare,
                                            K1=K1,
                                            K2=K2,
                                            LAMBDA=LAMBDA,
                                            filter_know_matches=None,
                                            save_csv_dir=None)

    # Apply the necessary transformations to the results
    for column in results.columns:
        results[column] = results[column].apply(lambda x: x.split('_')[1])

    # Save the results to a CSV
    file_name = os.path.join(save_csv_dir, f'{file_name}_simple.csv')
    results.to_csv(file_name, index=False)

    # Calculate the metrics
    metrics = calculate_metrics(matches, file_name)

    # Log the results with the current parameter values
    with open('parameter_tuning_log.txt', 'a') as log_file:
        log_file.write(f"K1: {K1}, K2: {K2}, LAMBDA: {LAMBDA}\n")
        log_file.write(f"Rank1: {metrics['Rank1']}%\n")
        log_file.write(f"Rank5: {metrics['Rank5']}%\n")
        log_file.write(f"Matches#Rank5: {metrics['Matches#Rank5']}%\n")
        log_file.write(f"mAP: {metrics['mAP']}%\n")
        log_file.write("===============================\n")
    
    # Update best parameters (depending on your criteria for 'best')
    if metrics['Rank1'] > best_rank1:
        best_rank1 = metrics['Rank1']
        best_combination = param_set


with open('parameter_tuning_log.txt', 'a') as log_file:
    log_file.write(f"Best parameter combination: K1: {best_combination[0]}, K2: {best_combination[1]}, LAMBDA: {best_combination[2]}\n")
    log_file.write(f"With Rank1: {best_rank1}%\n")


# After all iterations, print the best combination of parameters
print(f"Best parameter combination: K1: {best_combination[0]}, K2: {best_combination[1]}, LAMBDA: {best_combination[2]}")
print(f"With Rank1: {best_rank1}%")


































# K1 = 12 # This value must be between 2 and 20
# K2 = 2 # This value must be between 2 and 20 and always less than K1
# LAMBDA = 0.3 # This value must be between 0 and 1, starting from 0.1 and increasing by 0.1 until 0.9

# results, file_name = perform_re_ranking(features_csv,
#                                         n_images=n_images,
#                                          max_number_back_to_compare=max_number_back_to_compare,
#                                          K1=K1,
#                                          K2=K2,
#                                          LAMBDA=LAMBDA,
#                                          filter_know_matches=None,
#                                          save_csv_dir=None)

# for column in results.columns:
#     results[column] = results[column].apply(lambda x: x.split('_')[1])
# file_name = os.path.join(save_csv_dir, f'{file_name}_simple.csv')
# results.to_csv(file_name, index=False)

# re_ranking = file_name
# metrics = calculate_metrics(matches, re_ranking)


# #### This must be in a log file wth the value of K1, K2 and LAMBDA
# print(f"Rank1: {metrics['Rank1']}%")
# print(f"Rank5: {metrics['Rank5']}%")
# print(f"Matches#Rank5: {metrics['Matches#Rank5']}%")
# print(f"mAP: {metrics['mAP']}%")
# print("===============================\n")
