# Object Detection using YOLOv7

##  Installing dependencies

In [None]:
import torch
!git clone https://github.com/WongKinYiu/yolov7
%cd yolov7
!pip install -r requirements.txt
# !pip install roboflow #roboflow에서 dataset 불러올 경우 설치
!apt-get -y install libgl1-mesa-glx
!apt-get -y install libglib2.0-0
!wget "https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt"

## Start Training
- Note for resuming training from checkpoint

By default, the checkpoints for the epoch are stored in folder, yolov7/runs/train, give the relative path to last epoch checkpoints

In [None]:
%cd /workspace/yolov7/
!python train.py --batch 64 --cfg cfg/training/yolov7.yaml --epochs 100 --data /workspace/bowling/data/YOLO/data.yaml --weights 'yolov7.pt' --device 0

In [None]:
# On image path:
%cd /workspace/yolov7/
!python detect.py --weights runs/train/exp/weights/best.pt --conf 0.25 --img-size 640 --source /workspace/bowling/data/YOLO/val/images/

## **Evaluation**
- Note the checkpoints from training will be stored by default in runs/train/exp. Take the path of the latest checkpoint

We can evaluate the performance of our custom training using the provided evaluation script.
After doing below cell, check the inference on folder of test images

In [None]:
from IPython.display import Image
display(Image("/workspace/yolo7/runs/train/exp/F1_curve.png", width=400, height=400))
display(Image("/workspace/yolo7/runs/train/exp/PR_curve.png", width=400, height=400))
display(Image("/workspace/yolo7/runs/train/exp/confusion_matrix.png", width=500, height=500))
# Run evaluation
!python detect.py --weights /workspace/yolov7/runs/train/exp/weights/best.pt --conf 0.1 --source /workspace/yolov7/bowling-1/test/images

# Inference on Video
In here, upload video from Local System(maybe Server System..?)

In [7]:
video_path = '/workspace/yolo7/sample_video.mp4'
# please prepare the video to test
# Initializing video object
video = cv2.VideoCapture(video_path)


#Video information
fps = video.get(cv2.CAP_PROP_FPS)
w = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
nframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialzing object for writing video output
output = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'DIVX'),fps , (w,h))
torch.cuda.empty_cache()
# Initializing model and setting it for inference
with torch.no_grad():
  weights, imgsz = opt['weights'], opt['img-size']
  set_logging()
  device = select_device(opt['device'])
  half = device.type != 'cpu'
  model = attempt_load(weights, map_location=device)  # load FP32 model
  stride = int(model.stride.max())  # model stride
  imgsz = check_img_size(imgsz, s=stride)  # check img_size
  if half:
    model.half()

  names = model.module.names if hasattr(model, 'module') else model.names
  colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
  if device.type != 'cpu':
    model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))

  classes = None
  if opt['classes']:
    classes = []
    for class_name in opt['classes']:
      classes.append(opt['classes'].index(class_name))

  for j in range(nframes):

      ret, img0 = video.read()
      if ret:
        img = letterbox(img0, imgsz, stride=stride)[0]
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
          img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment= False)[0]


        pred = non_max_suppression(pred, opt['conf-thres'], opt['iou-thres'], classes= classes, agnostic= False)
        t2 = time_synchronized()
        for i, det in enumerate(pred):
          s = ''
          s += '%gx%g ' % img.shape[2:]  # print string
          gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]
          if len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

            for c in det[:, -1].unique():
              n = (det[:, -1] == c).sum()  # detections per class
              s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

            for *xyxy, conf, cls in reversed(det):

              label = f'{names[int(cls)]} {conf:.2f}'
              plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)

        print(f"{j+1}/{nframes} frames processed")
        output.write(img0)
      else:
        break


output.release()
video.release()

NameError: name 'cv2' is not defined

In [None]:
from IPython.display import HTML
from base64 import b64encode
import os

# Input video path
save_path = '/bowling/yolov7/output.mp4'

# Compressed video path
compressed_path = "/bowling/result_compressed.mp4"

os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")

# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

### 일단 작성은 다 했는데, 모델 학습시켜서 직접 코드 돌려봐야지 오류나는 부분 잡을 수 있을 것 같다.
아니 그리고 패키지 설치 왜안되는건데 ㅡㅡ
