# 1. Unzip

In [2]:
import os
import zipfile
import pandas as pd

In [None]:
# Paths
zip_folder = 'O4_models/annotations_zip'              # Folder containing the zip files
output_folder = 'O4_models/annotations'         # Folder where you want to extract
xlsx_path = 'O4_models/registros.xlsx'          # Path to the Excel file

In [9]:
# Read Excel
df = pd.read_excel(xlsx_path, usecols=['ID', 'Nombre'])

# Create a mapping from zip base name to ID_Nombre
name_mapping = {str(row.Nombre): f"{row.ID}_{row.Nombre}" for _, row in df.iterrows()}

# Process each zip file
for filename in os.listdir(zip_folder):
    if filename.endswith('.zip'):
        base_name = os.path.splitext(filename)[0]  # Remove .zip extension
        
        # Look for ID match
        if base_name in name_mapping:
            folder_name = name_mapping[base_name]
            dest_path = os.path.join(output_folder, folder_name)
            os.makedirs(dest_path, exist_ok=True)

            zip_path = os.path.join(zip_folder, filename)
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(dest_path)
        else:
            print(f"Warning: No matching ID for zip '{filename}'")


# 2. Frames 1

In [10]:
import os
import pandas as pd

# Root directory containing the ID_Name folders
root_folder = 'O4_models/annotations'   # <-- Replace this with your actual path
output_xlsx = 'O4_models/frames_summary.xlsx'

In [12]:
data = []

for folder_name in os.listdir(root_folder):
    folder_path = os.path.join(root_folder, folder_name)
    if os.path.isdir(folder_path):
        try:
            id_part, name_part = folder_name.split('_', 1)
        except ValueError:
            print(f"Skipping folder (invalid format): {folder_name}")
            continue

        train_path = os.path.join(folder_path, 'labels', 'train')
        frame_count = 0

        if os.path.isdir(train_path):
            frame_count = len([
                f for f in os.listdir(train_path)
                if f.startswith('frame_') and f.endswith('.txt')
            ])

        data.append({
            'ID': id_part,
            'Name': name_part,
            'Frames': frame_count
        })

# Save to Excel
df = pd.DataFrame(data)
df.to_excel(output_xlsx, index=False)


# 3. Frames 2

In [13]:
import os
import cv2
import pandas as pd

# Paths
video_folder = 'O4_models/clips'             # Folder with .mp4 files
xlsx_1 = 'O4_models/registros.xlsx'              # The one with 'Video' column
xlsx_2 = 'O4_models/frames_summary.xlsx'                     # The one with ID/Name/Frames
output_xlsx = 'O4_models/summary_with_video_info.xlsx'

In [14]:


# Load Excel #1 to get video names
df_videos = pd.read_excel(xlsx_1, usecols=['ID', 'Nombre', 'Video'])

# Create a mapping: (ID, Name) -> Video Filename
video_map = {
    (str(row.ID), str(row.Nombre).strip()): str(row.Video).strip()
    for _, row in df_videos.iterrows()
}

# Load Excel #2
df_summary = pd.read_excel(xlsx_2)

# Initialize columns
df_summary['Frames_Video'] = 0
df_summary['Match'] = False

# Process each row
for idx, row in df_summary.iterrows():
    id_str = str(row['ID'])
    name_str = str(row['Name']).strip()

    key = (id_str, name_str)
    video_name = video_map.get(key)

    if video_name:
        video_path = os.path.join(video_folder, video_name)
        if os.path.exists(video_path):
            cap = cv2.VideoCapture(video_path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()

            df_summary.at[idx, 'Frames_Video'] = frame_count
            df_summary.at[idx, 'Match'] = (frame_count == row['Frames'])
        else:
            print(f"Video not found: {video_path}")
    else:
        print(f"No video mapping for: {key}")

# Save updated Excel
df_summary.to_excel(output_xlsx, index=False)


# 4. Rename

In [15]:
import os
import shutil

In [16]:
# Input and output folders
input_root = 'O4_models/annotations'         # e.g., folder with 50 folders
output_folder = 'O4_models/test/labels'

In [18]:
os.makedirs(output_folder, exist_ok=True)

for folder_name in os.listdir(input_root):
    folder_path = os.path.join(input_root, folder_name)
    if not os.path.isdir(folder_path):
        continue

    try:
        id_str, _ = folder_name.split('_', 1)
    except ValueError:
        print(f"Skipping folder: {folder_name}")
        continue

    labels_path = os.path.join(folder_path, 'labels', 'train')
    if not os.path.isdir(labels_path):
        continue

    for filename in os.listdir(labels_path):
        if not filename.startswith('frame_') or not filename.endswith('.txt'):
            continue

        old_file_path = os.path.join(labels_path, filename)
        new_filename = f"{id_str}_{filename}"
        new_file_path = os.path.join(output_folder, new_filename)

        with open(old_file_path, 'r') as infile:
            lines = infile.readlines()

        new_lines = []
        for i, line in enumerate(lines[:5]):  # Max 5 lines
            parts = line.strip().split()
            if len(parts) >= 5:
                parts = parts[:5]             # Truncate if more than 5 parts
                parts[0] = id_str             # Replace class with ID
                new_lines.append(' '.join(parts))
            else:
                print(f"Skipping line in {filename} (not enough values): {line.strip()}")

        if new_lines:
            print(filename, new_lines)
            with open(new_file_path, 'w') as outfile:
                outfile.write('\n'.join(new_lines) + '\n')
        else:
            print(f"Warning: no valid lines in {filename}, skipping write.")

frame_000000.txt ['0 0.778167 0.632546 0.331229 0.394167']
frame_000001.txt ['0 0.778167 0.631019 0.331229 0.397222']
frame_000002.txt ['0 0.778167 0.629491 0.331229 0.400278']
frame_000003.txt ['0 0.777839 0.629491 0.334635 0.400278']
frame_000004.txt ['0 0.777500 0.629491 0.335312 0.400278']
frame_000005.txt ['0 0.777161 0.629491 0.335990 0.400278']
frame_000006.txt ['0 0.776823 0.629491 0.336667 0.400278']
frame_000007.txt ['0 0.775964 0.628588 0.338385 0.402083']
frame_000008.txt ['0 0.775104 0.627685 0.340104 0.403889']
frame_000009.txt ['0 0.774766 0.626782 0.339427 0.405694']
frame_000010.txt ['0 0.774427 0.625880 0.338750 0.407500']
frame_000011.txt ['0 0.773047 0.625880 0.341510 0.407500']
frame_000012.txt ['0 0.772031 0.625880 0.339479 0.407500']
frame_000013.txt ['0 0.769635 0.625880 0.344271 0.407500']
frame_000014.txt ['0 0.768620 0.625880 0.342240 0.407500']
frame_000015.txt ['0 0.767604 0.625880 0.344271 0.407500']
frame_000016.txt ['0 0.765547 0.628935 0.347031 0.413611

# 5. Extract Frames

In [19]:
import os
import cv2
import pandas as pd

# Paths
video_folder = 'O4_models/clips'             # Folder with .mp4 files
xlsx_path = 'O4_models/registros.xlsx'              # Excel file with ID, Nombre, Video
output_folder = 'O4_models/test/images'       # Output folder for saved frames

In [20]:
os.makedirs(output_folder, exist_ok=True)

# Read Excel
df = pd.read_excel(xlsx_path, usecols=['ID', 'Nombre', 'Video'])

for _, row in df.iterrows():
    id_str = str(row['ID'])
    video_name = str(row['Video']).strip()
    video_path = os.path.join(video_folder, video_name)

    if not os.path.isfile(video_path):
        print(f"Video not found: {video_path}")
        continue

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_name}")
        continue

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_filename = f"{id_str}_frame_{frame_idx:06d}.jpg"
        frame_output_path = os.path.join(output_folder, frame_filename)
        cv2.imwrite(frame_output_path, frame)
        frame_idx += 1

    cap.release()
    print(f"Extracted {frame_idx} frames from {video_name}")


Extracted 150 frames from changan cs35 plus a.mp4
Extracted 131 frames from Changan CS55 Plus c.mp4
Extracted 63 frames from Changan_New_Van.mp4
Extracted 361 frames from Chevrolet_Camaro.mp4
Extracted 151 frames from Chevrolet_Cruze.mp4
Extracted 151 frames from Chevrolet_Onix.mp4
Extracted 126 frames from Chevrolet_Spark.mp4
Extracted 126 frames from Chevrolet_Tracker.mp4
Extracted 176 frames from DFSK_Glory_330.mp4
Extracted 151 frames from DFSK_Glory_500.mp4
Extracted 176 frames from DFSK_Glory_580.mp4
Extracted 226 frames from Hyundai_Accent.mp4
Extracted 101 frames from Hyundai_Creta.mp4
Extracted 156 frames from Hyundai_Elantra.mp4
Extracted 256 frames from Hyundai_i20.mp4
Extracted 101 frames from Hyundai_Santa_Fe.mp4
Extracted 136 frames from Hyundai_Sonata.mp4
Extracted 132 frames from HyundaiTucson_clip.mp4
Extracted 123 frames from HyundaiVeloster_clip.mp4
Extracted 137 frames from JACJS4_clip.mp4
Extracted 142 frames from JACT8_clip.mp4
Extracted 154 frames from KiaNiro_cl