# Data preparation

## 1. Prepare raw data

Download raw data.

In [None]:
!mkdir datasets
!wget https://raw.githubusercontent.com/ifzhang/FairMOT/master/videos/MOT16-03.mp4 -O datasets/MOT16-03.mp4

Split video into clips

In [None]:
import os
import cv2

def split_video(video_path, clip_dir="./datasets/clips"):
    cap = cv2.VideoCapture(video_path)
    
    if not os.path.exists(clip_dir):
        os.makedirs(clip_dir)
    
    if (cap.isOpened()== False): 
        print("Error opening video stream or file")
    
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
    frame_num = 200
    
    frame_cnt = 0
    clip_cnt = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == True:
            if frame_cnt % frame_num == 0:
                if frame_cnt > 0:
                    out.release()
                
                clip_path = f"{clip_dir}/sample_{clip_cnt}.mp4"
                out = cv2.VideoWriter(clip_path, fourcc, fps, (frame_width, frame_height))
                print(f"save clip: {clip_path}")
                clip_cnt += 1
            out.write(frame)
            frame_cnt += 1
        else:
            break
    out.release()

clip_dir = "./datasets/clips"
split_video('./datasets/MOT16-03.mp4', clip_dir=clip_dir)

In [None]:
bucket_name = "sagemaker-us-east-1-822507008821"
prefix = "sm-bytetrack"
sample_data_s3uri = f"s3://{bucket_name}/{prefix}/sample-data"

In [None]:
!aws s3 cp --recursive $clip_dir $sample_data_s3uri

## 2. Label raw data

- Step-1: create a Private teams
- Step-2: Add a worker into the private team you created
- Step-3: Create a labeling job
- Step-4: Label data

Once finishing a labeling task, you can get the following annotation directory in the defined S3 path.

<div align="center">
    <img width=300 src="img/gt_structure.png">
    <figcaption>Ground Truth Structure</figcaption>
</div>

Under manifest directory, there should be an `out` folder created if we finish labeling all files.
<div align="center">
    <img width=300 src="img/gt_manifest_structure.png">
    <figcaption>Manifest in Ground Truth Structure</figcaption>
</div>

You will see a file `output.manifest` like this:
<div align="center">
    <img width=600 src="img/out_manifest.png">
    <figcaption>output.manifest</figcaption>
</div>

Refer to [Use Amazon SageMaker Ground Truth to Label Data](https://docs.aws.amazon.com/sagemaker/latest/dg/sms.html) for guide of labeling data. You can choose either video files or frame files to label data. 