### Select 4 best images from all images

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import shutil

BASE_FOLDER_NAME = 'results'
FOLDER_PATH_IMGS = '/home/diego/Documents/yolov7-tracker/imgs_conce/'
K_FOLD = 4
DEST_FOLDER_PATH_IMGS = f'/home/diego/Documents/yolov7-tracker/imgs_conce_top{K_FOLD}/'
MODEL_RESULT = os.path.join(BASE_FOLDER_NAME, 'total_model_img_selction_conce_bbox.csv')
THRESHOLD = 0.9

df = pd.read_csv(MODEL_RESULT)

# Correctly format 'model_label_conf' with 2 decimal places
df['model_label_conf'] = df['model_label_conf'].round(2)


df['new_k_fold'] = None
df['selected_image'] = False

# Saco los IDs correspondientes a los BAD
bad_ids = df[df['label_direction'] == 'BAD']['id'].unique()
filtered_df = df[~df['id'].isin(bad_ids)]

# Order by id and frame_number
filtered_df.sort_values(by=['id', 'frame_number'], inplace=True)

# Ensure the destination folder exists
if not os.path.exists(DEST_FOLDER_PATH_IMGS):
    os.makedirs(DEST_FOLDER_PATH_IMGS)

# Function to move selected images
def copy_images(row):
    source_path = os.path.join(FOLDER_PATH_IMGS, row['img_name'].split('_')[1], row['img_name'])
    dest_path = source_path.replace(FOLDER_PATH_IMGS, DEST_FOLDER_PATH_IMGS)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    shutil.copy(source_path, dest_path)  # Use shutil.copy instead of shutil.move

# Iterate over each unique id
for id_value in filtered_df['id'].unique():
    id_df = filtered_df[filtered_df['id'] == id_value]
    
    # Adjust threshold if necessary
    while True:
        filtered_id_df = id_df[(id_df['model_label_conf'] > THRESHOLD) & (id_df['model_label_img'] == 2)].copy()
        
        if len(filtered_id_df) >= K_FOLD or THRESHOLD <= 0:
            break
        THRESHOLD -= 0.05
    
    # If we have enough images, perform K-Fold and select one image per fold
    if len(filtered_id_df) >= K_FOLD:
        kf = KFold(n_splits=K_FOLD)
        
        for fold_number, (_, test_index) in enumerate(kf.split(filtered_id_df), start=1):

            # selected_indices = np.random.choice(test_index, 1, replace=False)

            fold_df = filtered_id_df.iloc[test_index]
            selected_row = fold_df.sample(n=1)
            selected_index = selected_row.index
            
            # Update the DataFrame with fold and selection information
            df.loc[selected_index, 'new_k_fold'] = fold_number
            df.loc[selected_index, 'selected_image'] = True
            
            # Move the selected image
            selected_row.apply(copy_images, axis=1)

# Optionally, save the updated DataFrame to a CSV file
# df.to_csv('logs/updated_model_results_with_kfold.csv', index=False)