# Imports

In [1]:
import pandas as pd
import os
import shutil
import glob

# Define interpolate_df function

In [2]:
def interpolate_df(df):
    """Interpolate DataFrame to fill missing frames between min_frame and max_frame."""
    df['frameNo'] = df['frameNo'].astype(int)
    
    # Check for and handle duplicate 'frameNo' entries
    if df['frameNo'].duplicated().any():
        print("Warning: Duplicate frameNo found in the DataFrame. Dropping duplicates.")
        df = df.drop_duplicates(subset='frameNo', keep='first')
    
    min_frame = df['frameNo'].min()
    max_frame = df['frameNo'].max()
    all_frames = range(min_frame, max_frame + 1)
    df.set_index('frameNo', inplace=True)
    df_interpolated = df.reindex(all_frames).interpolate(method='linear')
    df_interpolated.reset_index(inplace=True)
    df_interpolated.rename(columns={'index': 'frameNo'}, inplace=True)
    return df_interpolated

# Define process_abnormal_tracks function

In [3]:
def process_abnormal_tracks(abnormal_file):
    """Process abnormal tracks file to extract numbers as a set for lookup."""
    with open(abnormal_file, 'r') as f:
        lines = f.readlines()
    all_abnormal_nums = []
    for line in lines:
        line = line.strip()
        if line.endswith('.csv'):
            nums = line[:-4].split('_')  # Remove '.csv' and split by '_'
            all_abnormal_nums.extend(nums)
    abnormal_set = set(all_abnormal_nums)  # Set for fast lookup
    return abnormal_set

# Define process_dataset function

In [4]:
def process_dataset(src_dir, dest_dir, abnormal_file):
    """Process CSV files in src_dir, interpolate them, and segregate into normal/abnormal folders."""
    
    # Clear the destination directory if it already exists
    if os.path.exists(dest_dir):
        shutil.rmtree(dest_dir)
        print(f"Deleted existing directory: {dest_dir}")
    
    # Create destination subdirectories
    os.makedirs(os.path.join(dest_dir, 'normal'), exist_ok=True)
    os.makedirs(os.path.join(dest_dir, 'abnormal'), exist_ok=True)
    print(f"Created directories: {dest_dir}/normal, {dest_dir}/abnormal")
    
    # Get abnormal numbers
    abnormal_set = process_abnormal_tracks(abnormal_file)
    print(f"Loaded {len(abnormal_set)} abnormal numbers from {abnormal_file}")
    
    # Check for CSV files in src_dir
    csv_files = glob.glob(os.path.join(src_dir, '*.csv'))
    print(f"Found {len(csv_files)} CSV files in {src_dir}")
    
    # Process each CSV file
    for csv_file in csv_files:
        filename = os.path.basename(csv_file)
        df = pd.read_csv(csv_file)
        
        # Interpolate the DataFrame
        df_interpolated = interpolate_df(df)
        
        # Extract numbers from filename
        numbers = filename.replace('.csv', '').split('_')
        is_abnormal = any(num in abnormal_set for num in numbers)
        
        # Determine destination based on abnormality
        dest_subdir = 'abnormal' if is_abnormal else 'normal'
        dest_path = os.path.join(dest_dir, dest_subdir, filename)
        df_interpolated.to_csv(dest_path, index=False)
        print(f"Saved interpolated {filename} to {dest_subdir}")

# Define datasets and process each dataset

In [5]:
datasets = [
    {
        'src': './sptio-temporal-dataset/10/',
        'dest': './processed/10/',
        'abnormal': './sptio-temporal-dataset/AbnormalTracks_10.txt'
    },
    {
        'src': './sptio-temporal-dataset/11/',
        'dest': './processed/11/',
        'abnormal': './sptio-temporal-dataset/AbnormalTracks_11.txt'
    },
    {
        'src': './sptio-temporal-dataset/12/',
        'dest': './processed/12/',
        'abnormal': './sptio-temporal-dataset/AbnormalTracks_12.txt'
    }
]

for dataset in datasets:
    print(f"\nProcessing dataset: {dataset['dest']}")
    process_dataset(dataset['src'], dataset['dest'], dataset['abnormal'])


Processing dataset: ./processed/10/
Created directories: ./processed/10//normal, ./processed/10//abnormal
Loaded 246 abnormal numbers from ./sptio-temporal-dataset/AbnormalTracks_10.txt
Found 159 CSV files in ./sptio-temporal-dataset/10/
Saved interpolated 347_.csv to abnormal
Saved interpolated 748_.csv to abnormal
Saved interpolated 452_.csv to abnormal
Saved interpolated 514_549_568_621_704_718_799_820_836_868_878_889_897_899_921_932_946_954_959_.csv to abnormal
Saved interpolated 143_.csv to abnormal
Saved interpolated 214_.csv to abnormal
Saved interpolated 3_.csv to abnormal
Saved interpolated 7_37_72_128_.csv to abnormal
Saved interpolated 209_.csv to abnormal
Saved interpolated 644_.csv to abnormal
Saved interpolated 88_127_149_184_327_338_400_474_494_604_667_966_.csv to abnormal
Saved interpolated 798_817_.csv to abnormal
Saved interpolated 63_.csv to abnormal
Saved interpolated 589_.csv to abnormal
Saved interpolated 968_.csv to abnormal
Saved interpolated 938_.csv to abnorm