In [1]:
rom pytube import YouTube
import pandas as pd
import os
import cv2
import albumentations as A
import shutil
from PIL import Image
import numpy as np
WIDTH = 640
HEIGHT = 360
image_id = 0
num = 0
csv_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\csv"

# Augmentations being applied to data

In [2]:
# Augment data inorder to add variance to model
TRANSFORM = A.Compose([
    # Flip and Rotate
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(p=0.6,limit=22),

    # Add random holes to image
    A.CoarseDropout(max_holes=10,min_holes=1,fill_value=255,max_height=40,max_width=20,p=0.5),

    # Weather Augmentations
    A.OneOf([
        A.RandomRain(p=.25), #Typically for soccer games
        A.RandomSnow(p=.25), #Typically for soccer games
        A.RandomFog(p=.25), #Typically for soccer games
        A.RandomSunFlare(p=.125), #Typically for soccer games
        A.RandomShadow(p=.125), #Typically for soccer games
    ],p=0.5),

    # Color Augmentations
    A.OneOf([
        A.RandomBrightnessContrast(p=0.5),
        A.RandomGamma(p=0.5),
        A.RGBShift(p=0.5),
        A.HueSaturationValue(p=0.5),
    ],p=0.25),
])

# Helper Functions

In [3]:
# Download video from YouTube using url
def downloadVideo(url, video_name):
    yt = YouTube(url)
    try:
        yt.streams.filter(progressive=True, file_extension="mp4").last().download(
            output_path="C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\penalty_vids",
            filename=f"{video_name}.mp4",
        )
    except Exception as e:
        print(e)


# Grab frames from video along with the seconds
def getFrames(
    video,
    start_directory="C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\penalty_vids",
    end_directory="C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\frame_trash",
):
    global image_id
    vidcap = cv2.VideoCapture(os.path.join(start_directory, video+".mp4"))
    while vidcap.isOpened():
        success, image = vidcap.read()
        if success:
            cv2.imwrite(
                os.path.join(end_directory, f"{video[:-4]}_{image_id}.jpg"), image
            )  # save frame as JPEG file
            image_id += 1
        else:
            print("Done Here!")
            vidcap.release()
            return
        

# Delete all augmented images in both goalie and pen_kickers
def delAugmented(
    directory="C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\kicker_pens_simple\\train",
):
    # Cycle through kicker pens simple
    for files in os.listdir(directory):
        for image in os.listdir(os.path.join(directory, files)):
            if "_aug_" in image:
                os.remove(os.path.join(directory, files, image))


# Remove all images from trash
def delTrash(
    directory="C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\frame_trash",
):
    for image in os.listdir(directory):
        os.remove(os.path.join(directory, image))


# Augment images in frames directory
def augmentImages(directory, image):
    global num
    image_name = image[:-4]
    if "_aug_" not in image_name:
        os.path.join(directory, image)
        image = cv2.imread(os.path.join(directory, image), cv2.IMREAD_UNCHANGED)
        cv2.imwrite(
            os.path.join(directory, f"{image_name}_aug_{num}.jpg"),
            TRANSFORM(image=image)["image"],
        )
        num += 1
    else:
        return

# Download the penalities into frames

In [None]:
# Fifa Games
downloadVideo("https://www.youtube.com/watch?v=EfM_28vo6R0","facup_shootout")
downloadVideo("https://www.youtube.com/watch?v=inNPgOOP30c","carabao_cup")
downloadVideo("https://www.youtube.com/watch?v=jxP73Zqvq0I","World_Cup_2022")
downloadVideo("https://www.youtube.com/watch?v=Zpx7iSNDAg0","Euro_2020")
downloadVideo("https://www.youtube.com/watch?v=Pwm9XTNGGMo","Copa_America")
downloadVideo("https://www.youtube.com/watch?v=KoiXYX7tui4","ArgentinavNetherlands")
downloadVideo("https://www.youtube.com/watch?v=vluo9tQcTDU","SpainvMorocco")
downloadVideo("https://www.youtube.com/watch?v=DXpiG_-YTxk","UruguayvGhana")
downloadVideo("https://www.youtube.com/watch?v=JCLI_U3imR4","ManUvMid")
downloadVideo("https://www.youtube.com/watch?v=VLhgWCI96q8","NethervCosta")
downloadVideo("https://www.youtube.com/watch?v=WBHaJa5rZiI","WorstPens")
downloadVideo("https://www.youtube.com/watch?v=X_NSB3CnHoo","VillvManU")
downloadVideo("https://www.youtube.com/watch?v=3bqN9o4QtBU","Copa2016")
downloadVideo("https://www.youtube.com/watch?v=r9ynUq_rcNY","FaCup")

# Football youtubers
downloadVideo("https://www.youtube.com/watch?v=TEKzuzuHXuU","Freekickerz")
downloadVideo("https://www.youtube.com/watch?v=kFJUYMdPARY","TheoBaker")
downloadVideo("https://www.youtube.com/watch?v=9nO4495R2Vc","LeftvRight")
downloadVideo("https://www.youtube.com/watch?v=9nO4495R2Vc","LeftvRight")
downloadVideo("https://www.youtube.com/watch?v=5bQUqTsC0JA","Miniminter")
downloadVideo("https://www.youtube.com/watch?v=klJYCR-6FRw","ChrisMD")
downloadVideo("https://www.youtube.com/watch?v=phQ48JVIXJg","ProDirect")

# Sadly, I had to go in and edit the penalty kicks and goalie kicks manually
- Right
- Left
- Center

# Kicker Penalties (70% train, 15% validation, 15% testing)

In [None]:
# Move validation data back to train
validation = pd.read_csv("C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\csv\\validation_kicker.csv")
for train_dir in validation["ImageDirectory"].values:
    val_dir = train_dir.replace("train","val")
    os.replace(val_dir,train_dir)

In [None]:
# Move test data back to train
test = pd.read_csv("C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\csv\\validation_kicker.csv")
for train_dir in test["ImageDirectory"].values:
    val_dir = train_dir.replace("train","test")
    os.replace(val_dir,train_dir)

In [4]:
delAugmented() #Deletes all augmented images

# Grab all images from classification directory
train_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\kicker_pens_simple\\train"
train_data = []
for file in os.listdir(train_dir):
    for image in os.listdir(f"{train_dir}\\{file}"):
        train_data.append([f"{train_dir}\\{file}\\{image}",file])

directoryDF = pd.DataFrame(train_data,columns=['ImageDirectory','ImageLabel'])
directoryDF

Unnamed: 0,ImageDirectory,ImageLabel
0,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center
1,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center
2,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center
3,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center
4,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center
...,...,...
2147,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right
2148,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right
2149,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right
2150,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right


In [6]:
train_kicker, validate_kicker, test_kicker = np.split(directoryDF.sample(frac=1), [int(.7*len(directoryDF)), int(.85*len(directoryDF))]) # Train = 70%, Validate = 15%, Test = 15% 
validate_kicker.to_csv(os.path.join(csv_dir, "validation_kicker.csv"),index=False) # Save so we know what we are validating on
test_kicker.to_csv(os.path.join(csv_dir, "test_kicker.csv"),index=False) # Save so we know what we are testing on
print(f"Training data length: {len(train_kicker)}\nValidation data length: {len(validate_kicker)}\nTesting data length: {len(test_kicker)}")

Training data length: 1506
Validation data length: 323
Testing data length: 323


In [7]:
val_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\kicker_pens_simple\\val"
test_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\kicker_pens_simple\\test"

# Move validation images to validation directory
for directory in validate_kicker['ImageDirectory'].values:
    classification = directory.split('\\')[-2]
    image_name = directory.split('\\')[-1]
    os.replace(directory,os.path.join(val_dir,classification,image_name))
    
# Move test images to test directory
for directory in test_kicker['ImageDirectory'].values:
    classification = directory.split('\\')[-2]
    image_name = directory.split('\\')[-1]
    os.replace(directory,os.path.join(test_dir,classification,image_name))

In [8]:
# Augment images in training data 
for file in os.listdir(train_dir):
    while len(os.listdir(os.path.join(train_dir,file))) < 1500:
        for image in os.listdir(os.path.join(train_dir,file)):
            augmentImages(os.path.join(train_dir,file),image)
            if len(os.listdir(os.path.join(train_dir,file))) >= 1500:
                break

# Goalie Penalties (70% train, 15% validation, 15% testing)

In [None]:
# Move validation data back to train
validation = pd.read_csv("C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\csv\\validation_goalie.csv")
for train_dir in validation["ImageDirectory"].values:
    val_dir = train_dir.replace("train","val")
    os.replace(val_dir,train_dir)

test = pd.read_csv("C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\csv\\test_goalie.csv")
for train_dir in test["ImageDirectory"].values:
    val_dir = train_dir.replace("train","test")
    os.replace(val_dir,train_dir)

In [9]:
train_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\goalie_pens\\train"
delAugmented(directory=train_dir) #Deletes all augmented images

# Grab all images from classification directory
train_data = []
for file in os.listdir(train_dir):
    for image in os.listdir(f"{train_dir}\\{file}"):
        train_data.append([f"{train_dir}\\{file}\\{image}",file])

directoryDF = pd.DataFrame(train_data,columns=['ImageDirectory','ImageLabel'])
directoryDF

Unnamed: 0,ImageDirectory,ImageLabel
0,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center_Goalie
1,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center_Goalie
2,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center_Goalie
3,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center_Goalie
4,C:\Users\David Salazar\Desktop\Python\Pendicti...,Center_Goalie
...,...,...
1792,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right_Goalie
1793,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right_Goalie
1794,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right_Goalie
1795,C:\Users\David Salazar\Desktop\Python\Pendicti...,Right_Goalie


In [10]:
train_goalie, validate_goalie, test_goalie = np.split(directoryDF.sample(frac=1,random_state=4), [int(.7*len(directoryDF)), int(.85*len(directoryDF))])
validate_goalie.to_csv(os.path.join(csv_dir, "validation_goalie.csv"),index=False) # Save so we know what we are validating on
test_goalie.to_csv(os.path.join(csv_dir, "test_goalie.csv"),index=False) # Save so we know what we are testing on
print(f"Training data length: {len(train_goalie)}\nValidation data length: {len(validate_goalie)}\nTesting data length: {len(test_goalie)}")

Training data length: 1257
Validation data length: 270
Testing data length: 270


In [None]:
# Move validation images to validation directory
val_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\goalie_pens\\val"
for directory in validate_goalie['ImageDirectory'].values:
    classification = directory.split('\\')[-2]
    image_name = directory.split('\\')[-1]
    os.replace(directory,os.path.join(val_dir,classification,image_name))

In [None]:
# Move test images to test directory
test_dir = "C:\\Users\\luoal\\Documents\\PenaltyProphet\\Data\\goalie_pens\\test"
for directory in test_goalie['ImageDirectory'].values:
    classification = directory.split('\\')[-2]
    image_name = directory.split('\\')[-1]
    os.replace(directory,os.path.join(test_dir,classification,image_name))

In [None]:
# Augment images in training data 
for file in os.listdir(train_dir):
    while len(os.listdir(os.path.join(train_dir,file))) < 1500:
        for image in os.listdir(os.path.join(train_dir,file)):
            augmentImages(os.path.join(train_dir,file),image)
            if len(os.listdir(os.path.join(train_dir,file))) >= 1500:
                break