### Select 4 best images from all images

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import shutil

BASE_FOLDER_NAME = 'results'
FOLDER_PATH_IMGS = '/home/diego/Documents/yolov7-tracker/imgs_conce/'
K_FOLD = 4
DEST_FOLDER_PATH_IMGS = f'/home/diego/Documents/yolov7-tracker/imgs_conce_top{K_FOLD}/'
MODEL_RESULT = os.path.join(BASE_FOLDER_NAME, 'total_model_img_selction_conce_bbox.csv')
THRESHOLD = 0.9

df = pd.read_csv(MODEL_RESULT)

# Correctly format 'model_label_conf' with 2 decimal places
df['model_label_conf'] = df['model_label_conf'].round(2)


df['new_k_fold'] = None
df['selected_image'] = False

# Saco los IDs correspondientes a los BAD
bad_ids = df[df['label_direction'] == 'BAD']['id'].unique()
filtered_df = df[~df['id'].isin(bad_ids)]

# Order by id and frame_number
filtered_df.sort_values(by=['id', 'frame_number'], inplace=True)

# Ensure the destination folder exists
if not os.path.exists(DEST_FOLDER_PATH_IMGS):
    os.makedirs(DEST_FOLDER_PATH_IMGS)

# Function to move selected images
def copy_images(row):
    source_path = os.path.join(FOLDER_PATH_IMGS, row['img_name'].split('_')[1], row['img_name'])
    dest_path = source_path.replace(FOLDER_PATH_IMGS, DEST_FOLDER_PATH_IMGS)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    shutil.copy(source_path, dest_path)  # Use shutil.copy instead of shutil.move

# Iterate over each unique id
for id_value in filtered_df['id'].unique():
    id_df = filtered_df[filtered_df['id'] == id_value]
    
    # Adjust threshold if necessary
    while True:
        filtered_id_df = id_df[(id_df['model_label_conf'] > THRESHOLD) & (id_df['model_label_img'] == 2)].copy()
        
        if len(filtered_id_df) >= K_FOLD or THRESHOLD <= 0:
            break
        THRESHOLD -= 0.05
    
    # If we have enough images, perform K-Fold and select one image per fold
    if len(filtered_id_df) >= K_FOLD:
        kf = KFold(n_splits=K_FOLD)
        
        for fold_number, (_, test_index) in enumerate(kf.split(filtered_id_df), start=1):

            # selected_indices = np.random.choice(test_index, 1, replace=False)

            fold_df = filtered_id_df.iloc[test_index]
            selected_row = fold_df.sample(n=1)
            selected_index = selected_row.index
            
            # Update the DataFrame with fold and selection information
            df.loc[selected_index, 'new_k_fold'] = fold_number
            df.loc[selected_index, 'selected_image'] = True
            
            # Move the selected image
            selected_row.apply(copy_images, axis=1)

# Optionally, save the updated DataFrame to a CSV file
# df.to_csv('logs/updated_model_results_with_kfold.csv', index=False)

### Test Re ranking

In [1]:
import numpy as np
import torch

def re_ranking(probFea, galFea, k1, k2, lambda_value, local_distmat = None, only_local = False):
    # if feature vector is numpy, you should use 'torch.tensor' transform it to tensor
    query_num = probFea.size(0)
    all_num = query_num + galFea.size(0)
    if only_local:
        original_dist = local_distmat
    else:
        feat = torch.cat([probFea,galFea])
        # print('using GPU to compute original distance')
        distmat = torch.pow(feat,2).sum(dim=1, keepdim=True).expand(all_num,all_num) + \
                      torch.pow(feat, 2).sum(dim=1, keepdim=True).expand(all_num, all_num).t()
        distmat.addmm_(1,-2,feat,feat.t())
        original_dist = distmat.numpy()
        del feat
        if not local_distmat is None:
            original_dist = original_dist + local_distmat
    gallery_num = original_dist.shape[0]
    original_dist = np.transpose(original_dist / np.max(original_dist, axis=0))
    V = np.zeros_like(original_dist).astype(np.float16)
    initial_rank = np.argsort(original_dist).astype(np.int32)

#     print('starting re_ranking')
    for i in range(all_num):
        # k-reciprocal neighbors
        forward_k_neigh_index = initial_rank[i, :k1 + 1]
        backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
        fi = np.where(backward_k_neigh_index == i)[0]
        k_reciprocal_index = forward_k_neigh_index[fi]
        k_reciprocal_expansion_index = k_reciprocal_index
        for j in range(len(k_reciprocal_index)):
            candidate = k_reciprocal_index[j]
            candidate_forward_k_neigh_index = initial_rank[candidate, :int(np.around(k1 / 2)) + 1]
            candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,
                                               :int(np.around(k1 / 2)) + 1]
            fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0]
            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate]
            if len(np.intersect1d(candidate_k_reciprocal_index, k_reciprocal_index)) > 2 / 3 * len(
                    candidate_k_reciprocal_index):
                k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index, candidate_k_reciprocal_index)

        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
        weight = np.exp(-original_dist[i, k_reciprocal_expansion_index])
        V[i, k_reciprocal_expansion_index] = weight / np.sum(weight)
    original_dist = original_dist[:query_num, ]
    if k2 != 1:
        V_qe = np.zeros_like(V, dtype=np.float16)
        for i in range(all_num):
            V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0)
        V = V_qe
        del V_qe
    del initial_rank
    invIndex = []
    for i in range(gallery_num):
        invIndex.append(np.where(V[:, i] != 0)[0])

    jaccard_dist = np.zeros_like(original_dist, dtype=np.float16)

    for i in range(query_num):
        temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16)
        indNonZero = np.where(V[i, :] != 0)[0]
        indImages = [invIndex[ind] for ind in indNonZero]
        for j in range(len(indNonZero)):
            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(V[i, indNonZero[j]],
                                                                               V[indImages[j], indNonZero[j]])
        jaccard_dist[i] = 1 - temp_min / (2 - temp_min)

    final_dist = jaccard_dist * (1 - lambda_value) + original_dist * lambda_value
    del original_dist
    del V
    del jaccard_dist
    final_dist = final_dist[:query_num, query_num:]
    return final_dist

def eval_simplified_with_matches(distmat, q_pids, g_pids):
    indices = np.argsort(distmat, axis=1)  # Sorted indices of gallery samples for each query
    matchs = np.hstack((q_pids[:, np.newaxis], g_pids[indices]))
    return matchs

#### Mejorar esto y hacerlo solo con SOLIDER CSV

In [6]:
import torch
import pandas as pd
from scipy.spatial.distance import cdist

features = pd.read_csv('../output/conce_solider_in-out_DB.csv')
for col in features.columns[3:]:  
		features[col] = features[col].astype(float)

correct_labels = pd.read_csv('/home/diego/Desktop/MatchSimple.csv')
ids_correct_outs = correct_labels['OUT'].values
ids_correct_ins = correct_labels['IN'].values

print(f"Correct OUTs: {len(ids_correct_outs)} Total OUTs: {len(features[features['Direction'] == 'Out']['ID'].unique())} Diff: {len(features[(features['Direction'] == 'Out') & (~features['ID'].isin(ids_correct_outs)) ]['ID'].unique())}")
print(f"Correct INs: {len(ids_correct_ins)} Total INs: {len(features[features['Direction'] == 'In']['ID'].unique())} Diff: {len(features[(features['Direction'] == 'In') & (~features['ID'].isin(ids_correct_ins)) ]['ID'].unique())}")

id_out_list = features[(features['Direction'] == 'Out') & (~features['ID'].isin(ids_correct_outs)) ]['ID'].unique()
id_in_list = features[(features['Direction'] == 'In') & (~features['ID'].isin(ids_correct_ins)) ]['ID'].unique()

query = []
gallery = []
results_list = []
rank = 5
# Iterate over each id_out to construct query and gallery
for index,id_out in enumerate(id_out_list):
		if id_out < id_in_list[0]:
			continue
			
		if id_out == 1488:
			print(id_out)
		query_features = features[features['ID'] == id_out].iloc[:, 3:].to_numpy()
		query = torch.tensor(query_features, dtype=torch.float32)
		q_pids = features[features['ID'] == id_out]['ID'].values

		gallery_features = features[(features['ID'] < id_out) & (features['Direction'] == 'In')].iloc[:, 3:].to_numpy()  # Adjust based on your logic
		gallery = torch.tensor(gallery_features, dtype=torch.float32)
		g_pids = features[(features['ID'] < id_out) & (features['Direction'] == 'In')]['ID'].values

		# Assuming id_in < id_out, adjust your logic as needed
		q_ids = [id_in for id_in in id_in_list if id_in < id_out]
		# Normalize features
		query = query / query.norm(dim=1, keepdim=True)
		gallery = gallery / gallery.norm(dim=1, keepdim=True)

		distmat = re_ranking(query, gallery, 4, 2, 0.3)
		matching_gallery_ids = eval_simplified_with_matches(distmat, q_pids, g_pids)
		for row in matching_gallery_ids[:, :rank+1]:  # Assuming you're interested in the first 6 columns
				results_list.append(row.tolist())

num_ranks = matching_gallery_ids.shape[1] if matching_gallery_ids.size > 0 else 0
column_names = ['query'] + [f'rank{i}' for i in range(1, num_ranks)]
# Create DataFrame
re_ranking_results = pd.DataFrame(results_list, columns=column_names[:rank+1])  # Adjust column slicing as necessary
# Save to CSV
re_ranking_results.to_csv('../output/re_ranking.csv', index=False)

Correct OUTs: 114 Total OUTs: 564 Diff: [   14    16   134   135   279   280   298   372   466   665   759   828
  1060  1082  1198  1302  1324  1333  1374  1418  1436  1476  1488  1561
  1578  1679  1737  1761  1793  1854  1868  1870  1965  1993  2013  2032
  2054  2111  2129  2204  2265  2344  2350  2353  2354  2465  2550  2553
  2663  2716  2734  2781  2820  2857  2891  2917  2989  3136  3306  3307
  3311  3357  3371  3419  3436  3438  3440  3474  3480  3483  3486  3503
  3504  3619  3620  3628  3777  3793  3817  3852  3921  3962  3980  3993
  3994  3995  4022  4023  4027  4078  4092  4120  4134  4200  4246  4267
  4274  4275  4286  4292  4333  4345  4346  4358  4370  4427  4428  4467
  4471  4478  4481  4494  4503  4515  4541  4548  4598  4617  4632  4641
  4692  4708  4710  4711  4748  4763  4785  4820  4822  4830  4832  4841
  4843  4845  4879  4880  4899  4907  4923  4957  4960  4961  4962  4972
  4973  4999  5015  5022  5107  5144  5150  5156  5158  5159  5164  5195
  5205  520

### Re ranking HTML

In [None]:
import base64
import os
import pandas as pd
import datetime

BASE_FOLDER = '/home/diego/Documents/yolov7-tracker/imgs_conce_top4/'
FRAME_RATE = 15

def seconds_to_time(seconds):
    # Create a timedelta object
    td = datetime.timedelta(seconds=seconds)
    # Add the timedelta to a minimal datetime object
    time = (datetime.datetime.min + td).time()
    # Convert to a string format
    return time.strftime("%H:%M:%S")

def _image_formatter(folder_id, index_img, query_frame_number):
    folder_path = os.path.join(BASE_FOLDER, str(folder_id))

    try:
        images_list = sorted(os.listdir(folder_path))  # Ensure consistent order
        if index_img <= len(images_list):
            img_file = images_list[index_img - 1]  # -1 because list index starts at 0
            img_frame_number = int(img_file.split('_')[2])
            img_path = os.path.join(folder_path, img_file)
            with open(img_path, "rb") as f:
                encoded_string = base64.b64encode(f.read()).decode()
                time = seconds_to_time(max(0,(query_frame_number - img_frame_number)) // FRAME_RATE)
                return f'<div><img width="125" src="data:image/png;base64,{encoded_string}"><div>ID: {img_file.split("_")[1]} - {time} </div></div>'
        else:
            return "Image index out of range"
    except OSError as e:
        # Including more specific error information
        return f"OSError: {e}, File: {img_path}"
    except FileNotFoundError:
        return "Folder or image not found"
    
re_ranking = pd.read_csv('../output/re_ranking.csv')

# Create DataFrame after calculating 'index_img'
df = pd.DataFrame({
    'Query': re_ranking['query'],
    'Rank1': re_ranking['rank1'],
    'Rank2': re_ranking['rank2'],
    'Rank3': re_ranking['rank3'],
    'Rank4': re_ranking['rank4'],
    'Rank5': re_ranking['rank5']
})
df['IndexImg'] = re_ranking.groupby('query').cumcount() + 1



def get_frame_number(folder_id):
	query_image_name =  os.path.join(BASE_FOLDER, str(folder_id))
	query_images_list = sorted(os.listdir(query_image_name))  # Ensure consistent order
	query_image = query_images_list[0]
	query_frame_number = int(query_image.split('_')[2])
	return query_frame_number
        

df['frame_number_query'] = df['Query'].apply(get_frame_number)

for rank in ['Query', 'Rank1', 'Rank2', 'Rank3', 'Rank4', 'Rank5']:
	df[rank] = df.apply(lambda x: _image_formatter(x[rank], x['IndexImg'],x['frame_number_query']), axis=1)

html_df2 = df[['Query','Rank1','Rank2','Rank3','Rank4','Rank5']].to_html(escape=False, index=False)

with open('../output/re_rank.html', 'w') as file:
        file.write(html_df2)