# Data Preprocessing

This notebook is dedicated to data preprocessing tasks for the Tennis Vision application. We will load the raw data, clean it, and prepare it for training.

In [None]:
import os
import cv2
import numpy as np
import pandas as pd

# Define paths
raw_data_path = '../data/raw/'
processed_data_path = '../data/processed/'

# Function to load raw video files
def load_raw_videos(path):
    videos = []
    for filename in os.listdir(path):
        if filename.endswith('.mp4') or filename.endswith('.avi'):
            video_path = os.path.join(path, filename)
            videos.append(video_path)
    return videos

# Load raw videos
raw_videos = load_raw_videos(raw_data_path)
print(f'Loaded {len(raw_videos)} videos from {raw_data_path}')

In [None]:
# Function to preprocess video frames
def preprocess_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Resize frame to a fixed size
        frame = cv2.resize(frame, (640, 480))
        frames.append(frame)
    cap.release()
    return frames

# Preprocess all videos and save processed frames
for video in raw_videos:
    frames = preprocess_video(video)
    # Save processed frames (example: saving as numpy array)
    video_name = os.path.basename(video).split('.')[0]
    np.save(os.path.join(processed_data_path, f'{video_name}_frames.npy'), frames)
    print(f'Processed and saved frames for {video_name}')

## Summary

In this notebook, we loaded raw video data, preprocessed the frames by resizing them, and saved the processed frames for further use in model training.