# Instructions on how to use these two scripts:vior column.

### Script 1: Sequence Extraction from Image Files
Specify the directory: Replace the placeholder in the line directory = r"Image\Directory\..." with the path to your directory where the image files are located.
Run the script: After specifying the directory, you can run the script. It will create a CSV file named “output.csv” in the same directory as the script. This file contains the start number, stop number, and length of each sequence of consecutive numbers.

### Script 2: Behavior Binary Array Creation
Load your CSV file: Replace the placeholder in the line "df = pd.read_csv(r"...\output.csv")" with the path to your CSV file. This should be the “output.csv” file created by the first script.
Specify the total number of frames: Replace the number in the line total_frames = 12613 with the total number of frames in your video.
Specify the frame rate and time step: Replace the numbers in the lines frame_rate = 30 and time_step = 0.1 with the frame rate of your video and the desired time step, respectively. The frame rate should be in frames per second, and the time step should be in seconds.
Specify the directory for saving CSV files: Replace the placeholder in the line "df.to_csv(f'Director\\\for\\saving\\csv\\{behavior}_binary_array.csv', index=True)" with the directory where you want to save the CSV files.
Run the script: After specifying the CSV file, total number of frames, frame rate, time step, and directory for saving CSV files, you can run the script. It will create a separate CSV file for each unique behavior in the data. Each file contains a time column and a behavior column.

# Part 1: Sequence Extraction from Image Files

In [None]:
import os
import csv
import re

# Specify the directory where your files are
directory = r"Image\Directory\..."

# This will get a list of all filenames in the directory
filenames = os.listdir(directory)

# Filter out any files that aren't .png files
filenames = [f for f in filenames if f.endswith(".png")]

# Sort the filenames
filenames.sort()

# This regular expression will match the number at the end of each filename
regex = re.compile(r"(\d+)\.png$")

# Initialize an empty list to store sequences of consecutive numbers
sequences = []

# Initialize start and prev variables
start = None
prev = None

# Iterate over the sorted filenames
for i, filename in enumerate(filenames): 
    # Search for the number at the end of the filename
    match = regex.search(filename)
    if match:
        # If a number is found, convert it to an integer
        current = int(match.group(1))
        # If this is the first number or if the number is not consecutive with the previous number,
        # start a new sequence
        if start is None:
            start = current
        elif current != prev + 1:
            # If the number is not consecutive with the previous number, add the previous sequence to the list
            sequences.append((start, prev, prev-start+1))
            # And start a new sequence
            start = current
        # Update the previous number
        prev = current

# Don't forget to add the last sequence
if start is not None:
    sequences.append((start, prev, prev-start+1))

# Now we write our sequences to a CSV file
with open("output.csv", "w", newline="") as f:
    writer = csv.writer(f)
    # Write the header row
    writer.writerow(["Start", "Stop", "Length"])
    # Write the sequences
    writer.writerows(sequences)


# Part 2: Behavior Binary Array Creation

In [None]:
import numpy as np
import pandas as pd

# Define the frame rate of your video
frame_rate = 30  # frames per second
time_step = 0.1  # seconds

# Load your CSV file
df = pd.read_csv(r"...\output.csv")

# Total number of frames in your video
total_frames = 12613

# Check if the maximum frame number exceeds the total number of frames in the video
if df['Stop'].max() > total_frames:
    print("Warning: The maximum frame number in the data exceeds the total number of frames in the video.")

# Calculate the total duration in time steps of 0.1 seconds
total_duration = total_frames / (frame_rate * time_step)

# Create a binary array for each unique behavior in the data
# The binary array is initially filled with zeros
behaviors = df['Behavior'].unique()
binary_arrays = {behavior: np.zeros(int(total_duration)) for behavior in behaviors}

# Fill in the binary arrays
for _, row in df.iterrows():
    # Convert frames to time steps of 0.1 seconds
    start, stop = row['Start'] / frame_rate, row['Stop'] / frame_rate
    behavior = row['Behavior']
    # Fill in the binary array for the given behavior
    # The start and stop indices are converted to integers because array indices must be integers
    binary_arrays[behavior][int(start):int(stop)] = 1

# Save these binary arrays as CSV files
for behavior, binary_array in binary_arrays.items():
    # Create a DataFrame with time and behavior columns
    df = pd.DataFrame({
        'Time': pd.to_datetime(np.arange(len(binary_array)) / (1/time_step), unit='s').time,
        'Behavior': binary_array
    })
    # Save the DataFrame as a CSV file
    df.to_csv(f'Director\\\for\\saving\\csv\\{behavior}_binary_array.csv', index=True)
