In [None]:
!unzip "/content/MP_Data.zip"

In [None]:
import os
import numpy as np

def process_folders(root_folder):
    mp_data_path = os.path.join(root_folder, "MP_Data")
    for folder_name in os.listdir(mp_data_path):
        folder_path = os.path.join(mp_data_path, folder_name)
        if os.path.isdir(folder_path):
            process_subfolders(folder_path)

def process_subfolders(folder_path):
    for subfolder_name in os.listdir(folder_path):
        subfolder_path = os.path.join(folder_path, subfolder_name)
        if os.path.isdir(subfolder_path):
            process_files_in_subfolder(subfolder_path)
def process_files_in_subfolder(subfolder_path):
    target_file_count = 30
    remove_files_to_reach_target(subfolder_path, target_file_count)
    rename_files(subfolder_path)

def euclidean_distance(vec1, vec2):
    return np.linalg.norm(vec1 - vec2)

def natural_sort_key(s):
    import re
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def find_distance_array(folder_path):
    files = sorted(os.listdir(folder_path), key=natural_sort_key)
    distances = []

    for i in range(len(files) - 1):
        file1_path = os.path.join(folder_path, files[i])
        file2_path = os.path.join(folder_path, files[i + 1])

        vec1 = np.load(file1_path)
        vec2 = np.load(file2_path)

        distance = euclidean_distance(vec1, vec2)
        distances.append((files[i], files[i + 1], distance))

    return distances

def remove_files_to_reach_target(folder_path, target_count):
    distances = find_distance_array(folder_path)
    distances.sort(key=lambda x: x[2])  # Sort based on distance

    num_files_to_remove = len(os.listdir(folder_path)) - target_count
    print(f"Removing {num_files_to_remove} files...")

    for i in range(num_files_to_remove):
        file_to_remove = os.path.join(folder_path, distances[i][0])
        os.remove(file_to_remove)
        print(f"Removed {distances[i][0]}")

def rename_files(folder_path):
    files = sorted(os.listdir(folder_path), key=natural_sort_key)
    for i, file_name in enumerate(files):
        new_name = f"{i}.npy"
        os.rename(os.path.join(folder_path, file_name), os.path.join(folder_path, new_name))

folder_path = "/content"
process_folders(folder_path)

In [None]:
!zip -r features.zip /content/MP_Data