In [None]:
# Import necessary libraries
import pandas as pd
from IPython.display import display  # For nicer DataFrame output in Jupyter

# Step 1: Read the CSV file
# Replace 'your_file.csv' with the actual filename if different
print("Reading the CSV file...")
df = pd.read_csv('your_file.csv')
print("Original DataFrame loaded successfully:")
display(df)

# Step 2: Ensure 'frameNo' is an integer type
# This ensures proper numeric operations
df['frameNo'] = df['frameNo'].astype(int)
print("Ensured 'frameNo' is integer type.")

# Step 3: Identify min_frame and max_frame
# According to the query, min_frame is the smallest frameNo, max_frame is the largest
min_frame = df['frameNo'].min()
max_frame = df['frameNo'].max()
print(f"Minimum frame (min_frame): {min_frame}")
print(f"Maximum frame (max_frame): {max_frame}")

# Step 4: How to identify missing frames?
# Missing frames are all integers between min_frame and max_frame that are not present in the CSV.
# We'll create a DataFrame with all frames and then find the gaps.
all_frames = pd.DataFrame(index=range(min_frame, max_frame + 1))
print(f"Created a range of all possible frames from {min_frame} to {max_frame}.")
print(f"Total number of frames (including existing and missing): {len(all_frames)}")

# Step 5: Set 'frameNo' as the index of the original DataFrame
# This allows us to align the existing data with the full range of frames
df.set_index('frameNo', inplace=True)
print("Set 'frameNo' as the index of the original DataFrame:")
display(df)

# Step 6: Reindex the DataFrame to include all frames from min_frame to max_frame
# Missing frames will have NaN values
df_all = df.reindex(range(min_frame, max_frame + 1))
print("DataFrame expanded to include all frames (missing frames have NaN values):")
display(df_all)

# Step 7: Count and identify the missing frames
# Missing frames are those with NaN values in any column (e.g., 'left')
missing_frames = df_all[df_all['left'].isna()]['left'].index.tolist()
print(f"Number of missing frames: {len(missing_frames)}")
print(f"Missing frame numbers: {missing_frames}")

# Step 8: Interpolate the missing values using linear interpolation
# The query mentions "linear regression," but for interpolation between points, linear interpolation
# is appropriate and aligns with fitting a line between consecutive points.
df_interpolated = df_all.interpolate(method='linear')
print("Performed linear interpolation to populate missing frames:")
display(df_interpolated)

# Step 9: Reset the index to make 'frameNo' a column again
# This restores the DataFrame to a standard format
df_interpolated.reset_index(inplace=True)
df_interpolated.rename(columns={'index': 'frameNo'}, inplace=True)
print("Reset index so 'frameNo' is a column again:")
display(df_interpolated)

# Step 10: Verify the interpolation for a few frames
# Let's check frames 4 and 41 (just after 3 and just before 42) as examples
print("Verifying interpolation for sample frames:")
for frame in [4, 41]:
    print(f"\nData for frame {frame}:")
    display(df_interpolated[df_interpolated['frameNo'] == frame])

# Step 11: Optional - Save the interpolated data to a new CSV file
# Uncomment the line below if you want to save the output
# df_interpolated.to_csv('interpolated_data.csv', index=False)
# print("Interpolated data saved to 'interpolated_data.csv'")

# Additional Notes:
# - The bounding box coordinates (left, top, w, h) may result in float values after interpolation.
# - If integers are required, you can round them by adding:
# df_interpolated[['left', 'top', 'w', 'h']] = df_interpolated[['left', 'top', 'w', 'h']].round()
# - This script is verbose and ready for further preprocessing steps (e.g., filtering, normalization).