-
Notifications
You must be signed in to change notification settings - Fork 478
/
create_dataset.py
139 lines (110 loc) · 5.08 KB
/
create_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import argparse
import os
import os.path
from shutil import rmtree, move
import random
# For parsing commandline arguments
parser = argparse.ArgumentParser()
parser.add_argument("--ffmpeg_dir", type=str, required=True, help='path to ffmpeg.exe')
parser.add_argument("--dataset", type=str, default="custom", help='specify if using "adobe240fps" or custom video dataset')
parser.add_argument("--videos_folder", type=str, required=True, help='path to the folder containing videos')
parser.add_argument("--dataset_folder", type=str, required=True, help='path to the output dataset folder')
parser.add_argument("--img_width", type=int, default=640, help="output image width")
parser.add_argument("--img_height", type=int, default=360, help="output image height")
parser.add_argument("--train_test_split", type=tuple, default=(90, 10), help="train test split for custom dataset")
args = parser.parse_args()
def extract_frames(videos, inDir, outDir):
"""
Converts all the videos passed in `videos` list to images.
Parameters
----------
videos : list
name of all video files.
inDir : string
path to input directory containing videos in `videos` list.
outDir : string
path to directory to output the extracted images.
Returns
-------
None
"""
for video in videos:
os.mkdir(os.path.join(outDir, os.path.splitext(video)[0]))
retn = os.system('{} -i {} -vf scale={}:{} -vsync 0 -qscale:v 2 {}/%04d.jpg'.format(os.path.join(args.ffmpeg_dir, "ffmpeg"), os.path.join(inDir, video), args.img_width, args.img_height, os.path.join(outDir, os.path.splitext(video)[0])))
if retn:
print("Error converting file:{}. Exiting.".format(video))
def create_clips(root, destination):
"""
Distributes the images extracted by `extract_frames()` in
clips containing 12 frames each.
Parameters
----------
root : string
path containing extracted image folders.
destination : string
path to output clips.
Returns
-------
None
"""
folderCounter = -1
files = os.listdir(root)
# Iterate over each folder containing extracted video frames.
for file in files:
images = os.listdir(os.path.join(root, file))
for imageCounter, image in enumerate(images):
# Bunch images in groups of 12 frames
if (imageCounter % 12 == 0):
if (imageCounter + 11 >= len(images)):
break
folderCounter += 1
os.mkdir("{}/{}".format(destination, folderCounter))
move("{}/{}/{}".format(root, file, image), "{}/{}/{}".format(destination, folderCounter, image))
rmtree(os.path.join(root, file))
def main():
# Create dataset folder if it doesn't exist already.
if not os.path.isdir(args.dataset_folder):
os.mkdir(args.dataset_folder)
extractPath = os.path.join(args.dataset_folder, "extracted")
trainPath = os.path.join(args.dataset_folder, "train")
testPath = os.path.join(args.dataset_folder, "test")
validationPath = os.path.join(args.dataset_folder, "validation")
os.mkdir(extractPath)
os.mkdir(trainPath)
os.mkdir(testPath)
os.mkdir(validationPath)
if(args.dataset == "adobe240fps"):
f = open("adobe240fps/test_list.txt", "r")
videos = f.read().split('\n')
extract_frames(videos, args.videos_folder, extractPath)
create_clips(extractPath, testPath)
f = open("adobe240fps/train_list.txt", "r")
videos = f.read().split('\n')
extract_frames(videos, args.videos_folder, extractPath)
create_clips(extractPath, trainPath)
# Select 100 clips at random from test set for validation set.
testClips = os.listdir(testPath)
indices = random.sample(range(len(testClips)), 100)
for index in indices:
move("{}/{}".format(testPath, index), "{}/{}".format(validationPath, index))
else: # custom dataset
# Extract video names
videos = os.listdir(args.videos_folder)
# Create random train-test split.
testIndices = random.sample(range(len(videos)), int((args.train_test_split[1] * len(videos)) / 100))
trainIndices = [x for x in range((len(videos))) if x not in testIndices]
# Create list of video names
testVideoNames = [videos[index] for index in testIndices]
trainVideoNames = [videos[index] for index in trainIndices]
# Create train-test dataset
extract_frames(testVideoNames, args.videos_folder, extractPath)
create_clips(extractPath, testPath)
extract_frames(trainVideoNames, args.videos_folder, extractPath)
create_clips(extractPath, trainPath)
# Select clips at random from test set for validation set.
testClips = os.listdir(testPath)
indices = random.sample(range(len(testClips)), min(100, int(len(testClips) / 5)))
for index in indices:
move("{}/{}".format(testPath, index), "{}/{}".format(validationPath, index))
rmtree(extractPath)
main()