# From whole video to multiple pictures all from positive classes.

In [2]:
import sys
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
YOLOV7_FOLDER = '/content/drive/My Drive/Bundesliga/YOLO7'
VIDEOS_FOLDER = '/yolov7/runs/detect'
CLIPS_FOLDER = YOLOV7_FOLDER + VIDEOS_FOLDER
os.chdir(CLIPS_FOLDER)
! pwd

/content/drive/My Drive/Bundesliga/YOLO7/yolov7/runs/detect


### Working with only the first video for the moment

In [5]:
os.chdir(CLIPS_FOLDER + '/clips')
! pwd

/content/drive/My Drive/Bundesliga/YOLO7/yolov7/runs/detect/clips


In [65]:
vidcap = cv2.VideoCapture('08fd33_0.mp4')
success, image = vidcap.read()
count = 0
while success:
  cv2.imwrite("frame%d.jpg" % count, image) # save frame as jpg file      
  success,image = vidcap.read()
  # print('Read a new frame: ', success)
  count += 1

In [6]:
from google.colab.patches import cv2_imshow

### Working with high resolution images for YOLO

In [6]:
EXP_FOLDER = '/exp9'
# EXP_FOLDER = '/exp10'

In [7]:
os.chdir(CLIPS_FOLDER + EXP_FOLDER) # changing this all the time: /exp9 for 08fd33_0.mp4, exp10 for 0a2d9b_0.mp4
! pwd

/content/drive/My Drive/Bundesliga/YOLO7/yolov7/runs/detect/exp9


In [8]:
# ! mkdir OriginalFrames

In [9]:
video = '08fd33_0'
video_original = f'{video}_original'
FOLDER_ORIGINAL = 'OriginalFrames/'
FOLDER_LABELS = 'labels/'

Creating frames - not original ones (with YOLO detections):

In [22]:
vidcap = cv2.VideoCapture(f'{video}.mp4')
success, image = vidcap.read()
count = 0
while success:
  cv2.imwrite(f'frame{count}.jpg', image) # save frame as JPEG file      
  success, image = vidcap.read()
  count += 1

Creating frames - original ones (without YOLO detections) to crop:

In [29]:
vidcap = cv2.VideoCapture(f'{video_original}.mp4')
success, image = vidcap.read()
count = 0
while success:
  cv2.imwrite(f'{FOLDER_ORIGINAL}frame{count}_original.jpg', image) # save frame as JPEG file      
  success, image = vidcap.read()
  count += 1

Cropping original frames (without YOLO detections):

In [10]:
CLASS_INDEX = 0
X_INDEX = 1
Y_INDEX = 2
PERSON_CLASS = '0'
BALL_CLASS = '32'
X_SIZE = 1928
Y_SIZE = 1024
CROP_SIZE = 224 # pixels, size of crop = (CROP_SIZExCROP_SIZE)
radius = int(CROP_SIZE / 2)

In [11]:
from os import listdir
from os.path import isfile, join
files = [file for file in listdir(FOLDER_ORIGINAL) if isfile(join(FOLDER_ORIGINAL, file))] # get a list of files in the directory

In [12]:
filenames = [os.path.splitext(filename)[0] for filename in files] # separate filenames from their extension .jpg

In [13]:
! pwd

/content/drive/MyDrive/Bundesliga/YOLO7/yolov7/runs/detect/exp9


### Test code to get closest pair of persons from frame (in case a ball is not detected)

In [75]:
FIRST_FILENAME_INDEX = 1
label_txt = f'{video}_{FIRST_FILENAME_INDEX}.txt'
with open(f'{FOLDER_LABELS}{label_txt}') as f:
  reader = f.read()
objects = [i.split(' ') for i in reader.split('\n')][:-1] # [:-1] because when splitting by '\n', last element is always [''] 

In [77]:
arr = np.array(objects)
persons = arr[arr[:, 0] == '0']

In [79]:
arr.shape, persons.shape

((32, 6), (29, 6))

In [80]:
arr

array([['0', '0.915625', '0.156481', '0.00520833', '0.0185185',
        '0.254883'],
       ['9', '0.90625', '0.308796', '0.00729167', '0.0175926',
        '0.276367'],
       ['0', '0.640365', '0.443519', '0.00885417', '0.0314815',
        '0.281982'],
       ['0', '0.446354', '0.961574', '0.028125', '0.0601852', '0.297119'],
       ['0', '0.455729', '0.37963', '0.00520833', '0.0259259',
        '0.324951'],
       ['32', '0.149479', '0.518981', '0.00416667', '0.00648148',
        '0.391113'],
       ['17', '0.85651', '0.67037', '0.0140625', '0.0425926', '0.392334'],
       ['0', '0.86224', '0.647222', '0.0109375', '0.0481481', '0.444336'],
       ['0', '0.63776', '0.459259', '0.00885417', '0.037037', '0.459717'],
       ['0', '0.452865', '0.410185', '0.00885417', '0.0296296',
        '0.467529'],
       ['0', '0.625', '0.38287', '0.00729167', '0.0268519', '0.511719'],
       ['0', '0.636719', '0.686574', '0.0119792', '0.0564815',
        '0.569336'],
       ['0', '0.31849', '0.330556

In [81]:
xy_persons = persons[:, X_INDEX:Y_INDEX + 1].astype(float) # location (0 to 1) of every person in the frame
xy_persons[:3]

array([[0.915625, 0.156481],
       [0.640365, 0.443519],
       [0.446354, 0.961574]])

In [82]:
XY_LOCATIONS = arr[:, X_INDEX:Y_INDEX + 1].astype(float)
XY_LOCATIONS[:3]

array([[0.915625, 0.156481],
       [0.90625 , 0.308796],
       [0.640365, 0.443519]])

In [47]:
# from scipy.spatial import KDTree

In [48]:
from scipy.spatial.distance import cdist

In [83]:
points = XY_LOCATIONS

# calculate all distances between two sets of points
dists = cdist(points, points)
# the self distance is 0 -> we don't want this so make it large
dists[dists == 0] = dists.max()

# get index of smallest distance
np.unravel_index(dists.argmin(), dists.shape)

(2, 8)

In [88]:
# calculate all distances between two sets of points
dists = cdist(xy_persons, xy_persons)
# the self distance is 0 -> we don't want this so make it large
dists[dists == 0] = dists.max()

# get index of smallest distance
closest_args = np.unravel_index(dists.argmin(), dists.shape)
arg1, arg2 = closest_args
arg1, arg2

(1, 5)

In [95]:
# get position of both players
two_people = xy_persons[[arg1, arg2]]
two_people

array([[0.640365, 0.443519],
       [0.63776 , 0.459259]])

In [99]:
# get frame center (average position of closest persons)
frame_center = np.mean(two_people, axis=0)
frame_center

array([0.6390625, 0.451389 ])

### Main code to get the cropped frames

In [64]:
for i, frame in enumerate(filenames, 1): # frame of video (filenames contains frames of a single video)
  label_txt = f'{video}_{i}.txt'
  with open(f'{FOLDER_LABELS}{label_txt}') as f:
    reader = f.read()
  identified_objects = [i.split(' ') for i in reader.split('\n')]

  for object_ in objects:
    if object_[CLASS_INDEX] == BALL_CLASS:
      x = int(float(object_[X_INDEX]) * X_SIZE)
      y = int(float(object_[Y_INDEX]) * Y_SIZE)
      ymin = max(y - radius, 0)
      ymax = min(y + radius, Y_SIZE)
      xmin = max(x - radius, 0)
      xmax = min(x + radius, X_SIZE)

      img = cv2.imread(frame + '.jpg')
      cropped_image = img[ymin:ymax, xmin:xmax] # We change from (x, y) to (y, x) to work with cv2_imshow
      cv2.imwrite(f'{FOLDER_ORIGINAL}{frame}_cropped.jpg', cropped_image)

Doing the same cell above with interpolation:

In [1]:
USE_PERSONS = True

In [None]:
for i, frame in enumerate(filenames, 1): # frame of video (filenames contains frames of a single video)
  label_txt = f'{video}_{i}.txt'
  with open(f'{FOLDER_LABELS}{label_txt}') as f:
    reader = f.read()
  identified_objects = [i.split(' ') for i in reader.split('\n')]
  ball = False

  for object_ in identified_objects:
    if object_[CLASS_INDEX] == BALL_CLASS:
      ball = True
      x = int(float(object_[X_INDEX]) * X_SIZE)
      y = int(float(object_[Y_INDEX]) * Y_SIZE)
      ymin = max(y - radius, 0)
      ymax = min(y + radius, Y_SIZE)
      xmin = max(x - radius, 0)
      xmax = min(x + radius, X_SIZE)

      img = cv2.imread(frame + '.jpg')
      cropped_image = img[ymin:ymax, xmin:xmax] # We change from (x, y) to (y, x) to work with cv2_imshow
      cv2.imwrite(f'{FOLDER_ORIGINAL}{frame}_cropped.jpg', cropped_image)
    
  if not ball and USE_PERSONS:
    pass # Maybe create a counter += 1, and when the ball is not detected in 15? consecutive frames, generate the location like this