In [1]:
import os
import shutil
import re
import pafy
import cv2
import numpy as np
import warnings

from tqdm.auto import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
urls = [
    "https://www.youtube.com/watch?v=1rWw2LkYzAQ&t=1s",
    "https://www.youtube.com/watch?v=nKW8LPLl99E",
    "https://www.youtube.com/watch?v=kn5uevla61U",
    "https://www.youtube.com/watch?v=OmKbGOARXao"
]
test_url = "https://www.youtube.com/watch?v=OmKbGOARXao"

In [11]:
def getURL(url, path='', overwrite=False):
    
    # Get the video stream with 1280x720 resolution
    video = pafy.new(url)
    desired_resolution = np.array((1280, 720))
    streams = video.streams
    resolutions = np.array([s.dimensions for s in streams])
    idx = np.where(resolutions == desired_resolution)
    stream_idx = idx[0][0]
    stream = streams[stream_idx]
    print("URL:{}\n\tResolution:{}".format(url, stream.dimensions))
    
    # Set the filename, and remove invalid characters
    valid_chrs = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890"
    title = "".join([c if c in valid_chrs else "_" for c in stream.title])
    filename = os.path.join(path, "{}.{}".format(title, stream.extension))
    ret = {"video":video, "stream": stream, "filename": filename}
    print("Writing to %s" % filename)
    if os.path.isfile(filename):
        if overwrite:
            try:
                os.remove(filename)
            except PermissionError as e:
                warnings.warn("Cannot overwrite video. Permission Denied")
                return ret 
        else:
            warnings.warn("File already exists")
            return ret
    stream.download(filepath=filename, quiet=False)
    return ret

def loadFile(filename):
    vidcap = cv2.VideoCapture(filename)
    return vidcap

def processFrame(frame, threshold=0.4):
    
    # Find how much green there is in the photo
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Threshold the HSV image to get only blue colors
    lower_green = np.array([40,50,50])
    upper_green = np.array([80,255,255])
    mask = cv2.inRange(hsv, lower_green, upper_green)
    
    mask = np.where(mask > 0, 1, 0)
    
    green_ratio = mask.sum() / (mask.size)
    if green_ratio > threshold:
        return True, frame
    else:
        return False, None
    
def processURL(url, outName):
    
    # Get the stream
    streamObj = getURL(url, overwrite=False)
    cap = loadFile(streamObj['filename'])
    
    # Video properties
    frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frameRate = int(frameCount / streamObj['video'].length)

    # Get the file extension
    filename, file_ext = os.path.splitext(outName)
    if not file_ext:
        file_ext = "avi"
    
    # Write to file
    codec = 'MJPG'
    writer = cv2.VideoWriter(f"{filename}.{file_ext}", cv2.VideoWriter_fourcc(*codec), 
                                 frameRate, (frameWidth, frameHeight), True)
    if not os.path.isdir(filename):
        os.mkdir(filename)
    else:
        cap.release()
        writer.release()
        return
        
    # Get one frame every 10 seconds
    everyFrame = frameRate * 10
    for i in tqdm(np.arange(1, frameCount, everyFrame), desc=streamObj['stream'].title.split(" ")[0]):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            break
        res, frame = processFrame(frame, 0.6)
        if res:
            writer.write(frame)
            plt.imsave(f"{filename}/{i}.png", cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))


    cap.release()
    writer.release()
    

In [12]:
for i, url in enumerate(urls):
    outName = f"training_2/train_{i}"
    print("Processing {}: {}".format(i, url))
    streamObj = processURL(url, outName)

Processing 0: https://www.youtube.com/watch?v=1rWw2LkYzAQ&t=1s
URL:https://www.youtube.com/watch?v=1rWw2LkYzAQ&t=1s
	Resolution:(1280, 720)
Writing to Real_Madrid_CF_vs_FC_Barcelona__2_3__Full_Match_23_04_17_HD.mp4




Processing 1: https://www.youtube.com/watch?v=nKW8LPLl99E
URL:https://www.youtube.com/watch?v=nKW8LPLl99E
	Resolution:(1280, 720)
Writing to Chelsea_vs_Manchester_City___Full_Match_HD____Final_Cup_18_19.mp4
  1,428,694,962 Bytes [100.00%] received. Rate: [2578 KB/s].  ETA: [0 secs]     

HBox(children=(IntProgress(value=0, description='Chelsea', max=961, style=ProgressStyle(description_width='ini…


Processing 2: https://www.youtube.com/watch?v=kn5uevla61U
URL:https://www.youtube.com/watch?v=kn5uevla61U
	Resolution:(1280, 720)
Writing to Brazil_vs_Germany___FULL_match___Men_s_Football_Final_Rio_2016___Throwback_Thursday.mp4
  1,393,914,930 Bytes [100.00%] received. Rate: [2496 KB/s].  ETA: [0 secs]     

HBox(children=(IntProgress(value=0, description='Brazil', max=1053, style=ProgressStyle(description_width='ini…


Processing 3: https://www.youtube.com/watch?v=OmKbGOARXao
URL:https://www.youtube.com/watch?v=OmKbGOARXao
	Resolution:(1280, 720)
Writing to First_Time_in_Full_Length__Lewandowski_s_9_Minute_Miracle.mp4
  94,087,366 Bytes [100.00%] received. Rate: [2935 KB/s].  ETA: [0 secs]    

HBox(children=(IntProgress(value=0, description='First', max=67, style=ProgressStyle(description_width='initia…




In [None]:
streamObj['video'].streams[-2].dimensions

In [2]:
list(enumerate([1, 2, 3]))

[(0, 1), (1, 2), (2, 3)]