In [1]:
import os
import shutil
import pandas as pd

In [2]:
def interpolate_df(df):
    """Interpolate DataFrame to fill missing frames between min_frame and max_frame."""
    
    df['frameNo'] = df['frameNo'].astype(int)
    
    # Handle duplicate frameNo entries
    if df['frameNo'].duplicated().any():
        print("Warning: Duplicate frameNo found. Dropping duplicates.")
        df = df.drop_duplicates(subset='frameNo', keep='first')
    
    min_frame = df['frameNo'].min()
    max_frame = df['frameNo'].max()
    all_frames = range(min_frame, max_frame + 1)
    
    df.set_index('frameNo', inplace=True)
    df_interpolated = df.reindex(all_frames).interpolate(method='linear')
    
    df_interpolated.reset_index(inplace=True)
    df_interpolated.rename(columns={'index': 'frameNo'}, inplace=True)
    
    return df_interpolated


In [3]:
# Base working directory
base_dir = os.getcwd()

# Folders you want to process
folders_to_process = ['10', '11', '12']

# Subfolders inside each number folder to read data from
input_subfolders = ['normal', 'abnormal']

# Minimum frame threshold (files below this go to discarded)
frame_threshold = 50

# Output folders (global interpolated and discarded)
interpolated_base = os.path.join(base_dir, 'interpolated')
discarded_base = os.path.join(base_dir, 'discarded')


In [4]:
def recreate_dir(path):
    """Deletes the directory if it exists, then creates a new one."""
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)


In [5]:
# Prepare top-level interpolated and discarded folders
recreate_dir(interpolated_base)
recreate_dir(discarded_base)


In [6]:
for folder_name in folders_to_process:
    folder_path = os.path.join(base_dir, folder_name)
    
    if not os.path.isdir(folder_path):
        print(f"Skipping missing folder: {folder_name}")
        continue

    print(f"\nProcessing folder: {folder_name}")

    # Prepare subfolders for this folder in interpolated and discarded
    interpolated_folder = os.path.join(interpolated_base, folder_name)
    discarded_folder = os.path.join(discarded_base, folder_name)

    interpolated_normal = os.path.join(interpolated_folder, 'normal')
    interpolated_abnormal = os.path.join(interpolated_folder, 'abnormal')
    discarded_normal = os.path.join(discarded_folder, 'normal')
    discarded_abnormal = os.path.join(discarded_folder, 'abnormal')

    # Clean (delete & recreate) these folders
    recreate_dir(interpolated_normal)
    recreate_dir(interpolated_abnormal)
    recreate_dir(discarded_normal)
    recreate_dir(discarded_abnormal)

    # Process both normal and abnormal input folders
    for subfolder in input_subfolders:
        input_dir = os.path.join(folder_path, subfolder)

        if not os.path.exists(input_dir):
            print(f"Skipping missing subfolder: {input_dir}")
            continue
        
        print(f"  Processing '{subfolder}' in {folder_name}")

        # List CSV files in input directory
        files = [f for f in os.listdir(input_dir) if f.endswith('.csv')]

        if not files:
            print(f"  No CSV files found in {input_dir}. Skipping.")
            continue

        for file_name in files:
            input_file = os.path.join(input_dir, file_name)

            # Load CSV
            df = pd.read_csv(input_file)
            num_frames = len(df)

            if num_frames < frame_threshold:
                # Move to discarded folder
                discard_subfolder = discarded_normal if subfolder == 'normal' else discarded_abnormal
                discard_file = os.path.join(discard_subfolder, file_name)
                shutil.copy2(input_file, discard_file)
                print(f"    Discarded {file_name} (frames: {num_frames}) -> {discard_file}")
                continue

            # Interpolate and save
            df_interpolated = interpolate_df(df)

            output_dir = interpolated_normal if subfolder == 'normal' else interpolated_abnormal
            output_file = os.path.join(output_dir, file_name)
            df_interpolated.to_csv(output_file, index=False)
            print(f"    Interpolated {file_name} (frames: {num_frames}) -> {output_file}")

print("\nProcessing complete for all folders.")



Processing folder: 10
  Processing 'normal' in 10
    Interpolated 347_.csv (frames: 68) -> /home/codesmith28/Projects/ML_2025_4_Cluster_555/Codes/dataset_interpolated/processed/interpolated/10/normal/347_.csv
    Interpolated 644_.csv (frames: 336) -> /home/codesmith28/Projects/ML_2025_4_Cluster_555/Codes/dataset_interpolated/processed/interpolated/10/normal/644_.csv
    Interpolated 88_127_149_184_327_338_400_474_494_604_667_966_.csv (frames: 554) -> /home/codesmith28/Projects/ML_2025_4_Cluster_555/Codes/dataset_interpolated/processed/interpolated/10/normal/88_127_149_184_327_338_400_474_494_604_667_966_.csv
    Interpolated 938_.csv (frames: 144) -> /home/codesmith28/Projects/ML_2025_4_Cluster_555/Codes/dataset_interpolated/processed/interpolated/10/normal/938_.csv
    Interpolated 51_.csv (frames: 129) -> /home/codesmith28/Projects/ML_2025_4_Cluster_555/Codes/dataset_interpolated/processed/interpolated/10/normal/51_.csv
    Interpolated 9_76_111_314_432_471_733_752_805_842_857_887