In [1]:
import os
BASE_IMAGES_PATH = '/home/diego/Documents/yolov7-tracker/imgs_conce'
BASE_FOLDER_NAME = 'results'
CSV_FILE_PATH = 'conce_bbox.csv'
CSV_FILE_PATH = os.path.join(BASE_FOLDER_NAME,CSV_FILE_PATH)

### Add images to CSV BBOX [110]

In [2]:
import os
import pandas as pd
import glob

def find_image_name(row, image_dict):
    """
    Construct the image name based on row's id and frame_number,
    then check if it exists in the image_dict.
    """
    id_frame_pattern = f"img_{int(row['id'])}_{int(row['frame_number'])}"
    matched_images = [img for img in image_dict.get(int(row['id']), []) if id_frame_pattern in img]
    return matched_images[0] if matched_images else None

def append_image_names(csv_path, base_path):
    # Load the CSV into a DataFrame
    df = pd.read_csv(csv_path, dtype={'id': 'int64','frame_number': 'int64'})

    # Dictionary to hold image names for each id
    image_dict = {}

    # List directories in the base path and filter by those matching the ids in the DataFrame
    for dir_name in os.listdir(base_path):
        dir_path = os.path.join(base_path, dir_name)
        if os.path.isdir(dir_path) and dir_name.isdigit():
            id = int(dir_name)
            # List all images for the current id
            image_dict[id] = [os.path.basename(x) for x in glob.glob(os.path.join(dir_path, "*.png"))]

    # Apply the function to find the matching image name for each row
    df['img_name'] = df.apply(lambda row: find_image_name(row, image_dict), axis=1)
    
    return df


updated_df = append_image_names(CSV_FILE_PATH, BASE_IMAGES_PATH)
updated_df.to_csv(CSV_FILE_PATH, index=False)

### Add kfold to images and add label_img, label_direction column [110]

In [3]:
# Re-importing necessary libraries and redefining the function with corrections
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import os
# Re-defining the set_folds function
def set_folds(csv_path, k_folds, n_images):
    df = pd.read_csv(csv_path)

    # Ensure 'img_name' column exists
    if 'img_name' not in df.columns:
        raise ValueError("img_name column doesn't exist in the dataset.")

    # Initialize k_fold column in original df
    df['k_fold'] = np.nan
    df['label_img'] = np.nan
    df['label_direction'] = np.nan

    # Filter rows where 'img_name' is not empty
    df_filtered = df[df['img_name'] != ''].copy()

    # Sort by 'id' and 'frame_number'
    df_filtered.sort_values(by=['id', 'frame_number'], inplace=True)

    # Process each ID separately in filtered df
    for id_value in df_filtered['id'].unique():
        subset = df_filtered[(df_filtered['id'] == id_value) & (df_filtered['img_name'].notna())]

        # Apply KFold or assign all to the same fold if condition is met
        if len(subset) < k_folds * n_images:
            df.loc[subset.index, 'k_fold'] = 0  # Assign all to fold 0 if condition is met
        else:
            # Apply KFold
            kf = KFold(n_splits=k_folds)
            for fold, (_, test_index) in enumerate(kf.split(subset)):
                # Select n_images per fold if specified
                #selected_indices = test_index[:n_images] if n_images < len(test_index) else test_index Selecciona los primeros n_images
                selected_indices = np.random.choice(test_index, min(n_images, len(test_index)), replace=False)
                df.loc[subset.iloc[selected_indices].index, 'k_fold'] = fold
                df.loc[subset.iloc[selected_indices].index, 'label_img'] = 0
    return df


df_with_folds = set_folds(CSV_FILE_PATH, k_folds=5, n_images=3)
df_with_folds.to_csv(CSV_FILE_PATH, index=False)


### CSV to SQL LITE [110]

In [None]:
import pandas as pd
import sqlite3
import os

def convert_csv_to_sqlite(csv_file_path, db_file_path, table_name='bbox_data'):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file_path)
    
    # Create a connection to the SQLite database
    conn = sqlite3.connect(db_file_path)
    
    # Write the data to a SQLite table
    df.to_sql(table_name, conn, if_exists='replace', index=False)
    
    # Close the connection
    conn.close()

BASE_FOLDER_NAME = 'results'
CSV_FILE_PATH = 'conce_bbox.csv'

CSV_FILE_PATH = os.path.join(BASE_FOLDER_NAME, CSV_FILE_PATH)

db_file_path = f'{BASE_FOLDER_NAME}/bbox_data.db'
convert_csv_to_sqlite(CSV_FILE_PATH, db_file_path)

### SQLite -> CSV [120]

In [2]:
import pandas as pd
import sqlite3
import os

def convert_sqlite_to_csv(db_file_path, csv_file_path, table_name='bbox_data'):
    # Create a connection to the SQLite database
    conn = sqlite3.connect(db_file_path)
    
    # Read the table into a pandas DataFrame
    df = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)
    
    # Write the DataFrame to a CSV file
    df.to_csv(csv_file_path, index=False)
    
    # Close the connection
    conn.close()

BASE_FOLDER_NAME = 'results'

db_file_path = f'{BASE_FOLDER_NAME}/bbox_data.db'
CSV_FILE_PATH = 'from_sql_bbox.csv'
CSV_FILE_PATH = os.path.join(BASE_FOLDER_NAME, CSV_FILE_PATH)

convert_sqlite_to_csv(db_file_path, CSV_FILE_PATH)