<a href="https://colab.research.google.com/github/bird0401/image_search/blob/main/no_metric_learning/2_data_feature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import shutil
import os 

import pathlib
import random

import numpy as np
import PIL.Image

In [2]:
# main dir
if os.path.isdir("/content/mnt") : shutil.rmtree("/content/mnt")
shutil.copytree("/content/drive/MyDrive/mnt","/content/mnt")

'/content/mnt'

In [3]:
# unzip
if os.path.isdir("/content/mnt/media/img_celeba") : shutil.rmtree("/content/mnt/media/img_celeba")
shutil.unpack_archive("mnt/media/img_celeba.zip", "mnt/media/")

In [4]:
!find mnt/media/img_celeba -type f -name "*.jpg" | sort > mnt/media/objects.txt

In [5]:
!wc -l mnt/media/objects.txt

202599 mnt/media/objects.txt


In [6]:
!head -n 5 mnt/media/objects.txt

mnt/media/img_celeba/000001.jpg
mnt/media/img_celeba/000002.jpg
mnt/media/img_celeba/000003.jpg
mnt/media/img_celeba/000004.jpg
mnt/media/img_celeba/000005.jpg


# Exploit feature

In [7]:
MEDIA_DIR = pathlib.Path("/content/mnt/media")
FEATURE_DIR = pathlib.Path("/content/mnt/feature")
ONNX_MODEL_PATH = pathlib.Path("/content/mnt/model/mobilenet_v3_large_100_224_feature_vector_v5.onnx")
image_list_path = MEDIA_DIR / "objects.txt"
feature_base_dir = FEATURE_DIR / ONNX_MODEL_PATH.stem

In [8]:
# to create path
def make_nested_id_path(dir, id, ext=""):
    return dir / id[0:2] / id[2:4] / (id + ext)
make_nested_id_path(feature_base_dir, "1234567", ".npy")

PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/12/34/1234567.npy')

In [9]:
with image_list_path.open("r") as file:
    image_paths = [pathlib.Path(path.rstrip()) for path in file.readlines()]
print(len(image_paths))

202599


In [10]:
!pip install onnxruntime-gpu==1.7.0
import onnxruntime

Collecting onnxruntime-gpu==1.7.0
  Downloading onnxruntime_gpu-1.7.0-cp37-cp37m-manylinux2014_x86_64.whl (29.9 MB)
[K     |████████████████████████████████| 29.9 MB 1.2 MB/s 
Installing collected packages: onnxruntime-gpu
Successfully installed onnxruntime-gpu-1.7.0


In [11]:
# pre-process each image and save
# takes about 40 minutes

random.shuffle(image_paths)

onnx_session = onnxruntime.InferenceSession(str(ONNX_MODEL_PATH))
input_name = onnx_session.get_inputs()[0].name
output_name = onnx_session.get_outputs()[0].name 

for image_path in image_paths:
    print(image_path)
    object_id = image_path.name
    feature_path = make_nested_id_path(feature_base_dir, object_id, ".npy")
    print(feature_path)
    if feature_path.exists():
        print("skip")
        continue

    image = PIL.Image.open(image_path)
    image = image.convert("RGB")
    image = image.resize((224, 224))
    image = np.array(image, dtype=np.float32)
    image = image / 255

    feature = onnx_session.run([output_name], {input_name: np.expand_dims(image, 0)})[0][0]
    feature_path.parent.mkdir(parents=True, exist_ok=True)
    np.save(feature_path, feature)

[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
mnt/media/img_celeba/128453.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/12/84/128453.jpg.npy
mnt/media/img_celeba/055037.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/05/50/055037.jpg.npy
mnt/media/img_celeba/102496.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/10/24/102496.jpg.npy
mnt/media/img_celeba/038198.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/03/81/038198.jpg.npy
mnt/media/img_celeba/101880.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/10/18/101880.jpg.npy
mnt/media/img_celeba/196212.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/19/62/196212.jpg.npy
mnt/media/img_celeba/080820.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/08/08/080820.jpg.npy
mnt/media/img_celeba/014989.jpg
/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/01/49/014989.jpg.npy
mn

In [12]:
feature_paths = sorted(feature_base_dir.glob("*/*/*.npy"))
print(len(feature_paths))

202599


In [13]:
feature_paths[:5]

[PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/00/00/000001.jpg.npy'),
 PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/00/00/000002.jpg.npy'),
 PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/00/00/000003.jpg.npy'),
 PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/00/00/000004.jpg.npy'),
 PosixPath('/content/mnt/feature/mobilenet_v3_large_100_224_feature_vector_v5/00/00/000005.jpg.npy')]

In [14]:
# copy to drive
# target_dir="/content/drive/MyDrive/mnt/feature"

# if os.path.isdir(target_dir) : shutil.rmtree(target_dir)
# shutil.copytree(FEATURE_DIR, target_dir)

# Split for easier use

In [None]:
for i in range(21):
  size = 10000
  object_ids = []
  features = []

  for feature_path in feature_paths[i*size : i*size+size]:
      print(feature_path)
      object_id = feature_path.stem
      feature = np.load(feature_path)
      object_ids.append(object_id)
      features.append(feature)

  object_ids = np.array(object_ids)
  features = np.array(features)

  # save
  object_ids_path = feature_base_dir / "{:04d}.object_ids.npy".format(i)
  features_path = feature_base_dir / "{:04d}.features.npy".format(i)

  np.save(object_ids_path, object_ids)
  np.save(features_path, features)

In [None]:
# remove no need folders
for i in range(21):
  if os.path.isdir(feature_base_dir/str(i).zfill(2)) : shutil.rmtree(feature_base_dir/str(i).zfill(2))

In [17]:
# copy to drive
target_dir="/content/drive/MyDrive/mnt/feature/"

if os.path.isdir(target_dir) : shutil.rmtree(target_dir)
shutil.copytree(FEATURE_DIR, target_dir)

'/content/drive/MyDrive/mnt/feature/'