# Import Libraries

In [None]:
import pandas as pd
import os
import shutil
import glob

# Define Functions

In [None]:
def process_abnormal_tracks(abnormal_file):
    """Process abnormal tracks file to extract unique numbers as a set."""
    with open(abnormal_file, 'r') as f:
        lines = f.readlines()
    abnormal_nums = set()
    for line in lines:
        line = line.strip()
        if line.endswith('.csv'):
            nums = line[:-4].split('_')  # Remove '.csv' and split by '_'
            abnormal_nums.update(nums)
    return abnormal_nums

def interpolate_df(df):
    """Interpolate DataFrame to fill missing frames between min_frame and max_frame."""
    df['frameNo'] = df['frameNo'].astype(int)
    min_frame = df['frameNo'].min()
    max_frame = df['frameNo'].max()
    all_frames = range(min_frame, max_frame + 1)
    df.set_index('frameNo', inplace=True)
    df_interpolated = df.reindex(all_frames).interpolate(method='linear')
    df_interpolated.reset_index(inplace=True)
    df_interpolated.rename(columns={'index': 'frameNo'}, inplace=True)
    return df_interpolated

def process_dataset(src_dir, dest_dir, abnormal_file, threshold):
    """Process CSV files in src_dir and segregate them into dest_dir subdirectories."""
    # Create destination subdirectories
    os.makedirs(os.path.join(dest_dir, 'normal'), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'abnormal'), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'discarded'), exist_ok=True)
    
    # Get set of abnormal numbers
    abnormal_set = process_abnormal_tracks(abnormal_file)
    
    # Process each CSV file
    for csv_file in glob.glob(os.path.join(src_dir, '*.csv')):
        # Read the CSV file
        df = pd.read_csv(csv_file)
        filename = os.path.basename(csv_file)
        
        # Calculate number of frames and time duration
        min_frame = df['frameNo'].min()
        max_frame = df['frameNo'].max()
        number_of_frames = max_frame - min_frame  # Span of frames
        time_duration = number_of_frames / 30.0  # 30 fps video
        
        if time_duration < threshold:
            # Move to discarded if duration is less than threshold
            dest_path = os.path.join(dest_dir, 'discarded', filename)
            shutil.move(csv_file, dest_path)
            print(f"Moved {filename} to discarded (duration: {time_duration:.3f}s)")
        else:
            # Interpolate data and save to normal or abnormal
            df_interpolated = interpolate_df(df)
            numbers = filename.replace('.csv', '').split('_')
            is_abnormal = any(num in abnormal_set for num in numbers)
            dest_subdir = 'abnormal' if is_abnormal else 'normal'
            dest_path = os.path.join(dest_dir, dest_subdir, filename)
            df_interpolated.to_csv(dest_path, index=False)
            os.remove(csv_file)  # Remove original after saving interpolated version
            print(f"Saved interpolated {filename} to {dest_subdir} (duration: {time_duration:.3f}s)")

# Define Paths and Threshold

In [None]:
# Define source and destination directories
datasets = [
    {
        'src': '../sptio-temporal-dataset/10',
        'dest': './10',
        'abnormal': '../sptio-temporal-dataset/AbnormalTracks_10.txt'
    },
    {
        'src': '../sptio-temporal-dataset/11',
        'dest': './11',
        'abnormal': '../sptio-temporal-dataset/AbnormalTracks_11.txt'
    },
    {
        'src': '../sptio-temporal-dataset/12',
        'dest': './12',
        'abnormal': '../sptio-temporal-dataset/AbnormalTracks_12.txt'
    }
]

# Global threshold in seconds
threshold = 1.0

# Process All Datasets

In [None]:
# Process each dataset
for dataset in datasets:
    print(f"\nProcessing dataset: {dataset['dest']}")
    process_dataset(dataset['src'], dataset['dest'], dataset['abnormal'], threshold)