In [1]:
# Importing Essential Libraries
import os
import cv2
import glob
import random
from tqdm import tqdm

In [2]:
# Required Parameters
dataset = "UCF11_updated_mpg/"          # Dataset path
dataset2 = "dataset/"                   # Dataset2 path
train_path = "training_set/"            # Training path
test_path = "testing_set/"              # Testing path
no_of_frames = 1650                     # Total number of frames to be extracted
categories = os.listdir(dataset)        # Name of each class/category

In [3]:
# Creating dataset directory
try:
    os.mkdir(dataset2)
    print("Folder {} created...".format(dataset2))
except:
    print("A folder {} already exists...".format(dataset2))

Folder dataset/ created...


In [4]:
# Creating training_set directory
try:
    os.mkdir(train_path)
    print("Folder {} created...".format(train_path))
except:
    print("A folder {} already exists...".format(train_path))

Folder training_set/ created...


In [5]:
# Creating testing_set directory
try:
    os.mkdir(test_path)
    print("Folder {} created...".format(test_path))
except:
    print("A folder {} already exists...".format(test_path))

Folder testing_set/ created...


In [6]:
# Creating same directories for dataset2/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(dataset2 + category)
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, dataset))

Folder swing created...
Folder soccer_juggling created...
Folder biking created...
Folder diving created...
Folder golf_swing created...
Folder horse_riding created...
Folder basketball created...
Folder volleyball_spiking created...
Folder trampoline_jumping created...
Folder tennis_swing created...
Folder walking created...


In [7]:
# Creating same directories for training_set/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(train_path + category)
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, train_path))

Folder swing created...
Folder soccer_juggling created...
Folder biking created...
Folder diving created...
Folder golf_swing created...
Folder horse_riding created...
Folder basketball created...
Folder volleyball_spiking created...
Folder trampoline_jumping created...
Folder tennis_swing created...
Folder walking created...


In [8]:
# Creating same directories for testing_set/ that are already present in the dataset directory
for category in categories:
    try:
        os.mkdir(test_path  + category)
        print("Folder {} created...".format(category))
    except:
        print("A folder already exists, named {}...".format(category, test_path))

Folder swing created...
Folder soccer_juggling created...
Folder biking created...
Folder diving created...
Folder golf_swing created...
Folder horse_riding created...
Folder basketball created...
Folder volleyball_spiking created...
Folder trampoline_jumping created...
Folder tennis_swing created...
Folder walking created...


In [9]:
# Combining multiple videos into single video file
for category in tqdm(categories):
    videofiles = [dataset + category + "/" + n for n in os.listdir(dataset + category) if n[-4:]==".mpg"]
    video_index = 0
    cap = cv2.VideoCapture(videofiles[0])    
    fourcc = cv2.VideoWriter_fourcc(*"XVID")
    # fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter("{}/{}/{}.mpg".format(dataset2, category, category), fourcc, 25, (320, 240))
    while(cap.isOpened()):
        ret, frame = cap.read()
        if frame is None:
            video_index += 1
            if video_index >= len(videofiles):
                break
            else:
                cap = cv2.VideoCapture(videofiles[ video_index ])
                ret, frame = cap.read()
                out.write(frame)
        else:
            out.write(frame)
    cap.release()
    out.release()
    cv2.destroyAllWindows()

100%|██████████| 11/11 [03:09<00:00, 17.21s/it]


In [10]:
# Saving total no. of frames of each classes/categories into an array
total_frames = []
for category in tqdm(categories):
    cap = cv2.VideoCapture(dataset2 + category + "/" + category + ".mpg")
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    total_frames.append(length)
    cap.release()
    cv2.destroyAllWindows()
print(total_frames)

100%|██████████| 11/11 [00:00<00:00, 397.79it/s]

[25130, 42458, 30554, 26591, 22528, 34641, 18244, 12076, 21290, 25016, 25591]





In [11]:
# Extracting 1650 images from each category
for category in tqdm(categories):
    a = glob.glob(dataset2 + category + '/*.mpg')
    for i in range(len(a)):
        count = 0
        cap = cv2.VideoCapture(a[0])
        while(cap.isOpened()):
            frame_id = cap.get(1)
            ret, frame = cap.read()
            if ret != True:
                print("Exiting...")
                break
            if frame_id % int(total_frames[categories.index(category)] / no_of_frames) == 0.0:
                if count >= no_of_frames:
                    break
                file_name = train_path + category + '/frame_{}.jpg'.format(count); count += 1;
                cv2.imwrite(file_name, frame)
        cap.release()
        cv2.destroyAllWindows()

100%|██████████| 11/11 [01:03<00:00,  5.75s/it]


In [12]:
# # Extracting one frame per five frame from the Videos
# for category in tqdm(categories):
#     count = 0    
#     a = glob.glob(dataset + '/' + category + '/*.avi')
#     for i in range(len(a)):
#         cap = cv2.VideoCapture(a[i])
#         frameRate = cap.get(5)
#         while(cap.isOpened()):
#             frameId = cap.get(1)
#             ret, frame = cap.read()
#             if (ret != True):
#                 break
#             if (frameId % math.floor(frameRate) == 0):
#                 cv2.imwrite(train_path + '/' + category + '/{}_{}.jpg'.format(category, count), frame)
#                 count += 1
#         cap.release()

In [13]:
# # Extracting every frame from the Videos
# for category in tqdm(categories):
#     count = 0    
#     a = glob.glob(dataset + category + '/*.avi')
#     for i in range(len(a)):
#         cap = cv2.VideoCapture(a[i])
#         # frameRate = cap.get(5)
#         while(cap.isOpened()):
#             # frameId = cap.get(1)
#             ret, frame = cap.read()
#             if (ret != True):
#                 break
#             # if (frameId % math.floor(frameRate) == 0):
#             else:
#                 cv2.imwrite(train_path + category + '/{}_{}.jpg'.format(category, count), frame)
#                 count += 1
#         cap.release()

In [14]:
# Moving 150 random images from training_set into testing_set
for category in tqdm(categories):
    sub_file = [file for file in glob.glob(train_path + category + "/*")]
    test_files = random.sample(sub_file, 150)
    for test_file in test_files:
        img = cv2.imread(test_file)
        os.remove(test_file)
        cv2.imwrite(test_path + category + '/' + test_file.split("/")[-1] , img)

100%|██████████| 11/11 [00:03<00:00,  2.91it/s]


In [15]:
# Counting number of images in each folder of training and testing set
for category in categories:
    print(len(os.listdir(train_path + category)), "in training &",
          len(os.listdir(test_path + category)), "in testing", ":", category)

1500 in training & 150 in testing : swing
1500 in training & 150 in testing : soccer_juggling
1500 in training & 150 in testing : biking
1500 in training & 150 in testing : diving
1500 in training & 150 in testing : golf_swing
1500 in training & 150 in testing : horse_riding
1500 in training & 150 in testing : basketball
1500 in training & 150 in testing : volleyball_spiking
1500 in training & 150 in testing : trampoline_jumping
1500 in training & 150 in testing : tennis_swing
1500 in training & 150 in testing : walking
