In [1]:
import os
import csv
import sys
import numpy as np

from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.preprocessing import image

In [2]:
width = height = 200
batch_size = 128
route = "/home/ubuntu/imagenet/ILSVRC/Data/CLS-LOC/train"

In [3]:
image_generator = image.ImageDataGenerator(rescale=1.0 / 255)
    
directory_iterator = image_generator.flow_from_directory(
    directory=route,
    target_size=(width, height),
    batch_size=batch_size,
    class_mode="categorical"
)

Found 1281167 images belonging to 1000 classes.


In [4]:
image_shape = (width, height, 3)
model = VGG19(include_top=False, weights="imagenet", input_shape=image_shape)

In [5]:
def get_image_feature(image_path, size):
  img = image.load_img(image_path, target_size=(width, height))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)

  return model.predict(x).flatten()

In [6]:
def get_features_size():
  head_path = os.path.join(route, directory_iterator.filenames[0])
  head_features = np.array(get_image_feature(head_path, (width, height)))
  
  return head_features.size

In [8]:
with open("vgg_features.csv", 'w') as f:
  writer = csv.writer(f)
  
  number_of_instances = len(directory_iterator.filenames)
  number_of_features = get_features_size()
  headers = [i for i in range(number_of_features)]
  headers.append(-1)
  writer.writerow(headers)
  
  count = 0
  for filename, label in zip(directory_iterator.filenames, directory_iterator.classes):
    file_path = os.path.join(route, filename)
    image_features = get_image_feature(file_path, (width, height))
    row = np.append(image_features, [label])
    writer.writerow(row)
    
    count += 1
    if count % 1000 == 0:
      sys.stdout.write(str((count * 100)/number_of_instances) + "%\n")

0%
0%
0%
0%
0%
0%
0%
0%
0%
0%
0%
0%
1%
1%
1%
1%
1%
1%
1%
1%
1%
1%
1%
1%
1%
2%
2%
2%
2%
2%
2%
2%
2%
2%
2%
2%
2%
2%
3%
3%
3%
3%
3%
3%
3%
3%
3%
3%
3%
3%
3%
4%
4%
4%
4%
4%
4%
4%
4%
4%
4%
4%
4%
4%
5%
5%
5%
5%
5%
5%
5%
5%
5%
5%
5%
5%
6%
6%
6%
6%
6%
6%
6%
6%
6%
6%
6%
6%
6%
7%
7%
7%
7%
7%
7%
7%
7%
7%
7%
7%
7%
7%
8%
8%
8%
8%
8%
8%
8%
8%
8%
8%
8%
8%
8%
9%
9%
9%
9%
9%
9%
9%
9%
9%
9%
9%
9%
9%
10%
10%
10%
10%
10%
10%
10%
10%
10%
10%
10%
10%
11%
11%
11%
11%
11%
11%
11%
11%
11%
11%
11%
11%
11%
12%
12%
12%
12%
12%
12%
12%
12%
12%
12%
12%
12%
12%
13%
13%
13%
13%
13%
13%
13%
13%
13%
13%
13%
13%
13%
14%
14%
14%
14%
14%
14%
14%
14%
14%
14%
14%
14%
14%
15%
15%
15%
15%
15%
15%
15%
15%
15%
15%
15%
15%
16%
16%
16%
16%
16%
16%
16%
16%
16%
16%
16%
16%
16%
17%
17%
17%
17%
17%
17%
17%
17%
17%
17%
17%
17%
17%
18%
18%
18%
18%
18%
18%
18%
18%
18%
18%
18%
18%
18%
19%
19%
19%
19%
19%
19%
19%
19%
19%
19%
19%
19%
19%
20%
20%
20%
20%
20%
20%
20%
20%
20%
20%
20%
20%
20%
21%
21%
21%
21%
21%
21%
21%
21%
21%
21%
21%
21%
22%


In [None]:
__features__ = model.predict_generator(directory_iterator)