In [1]:
import numpy as np
import os
from scipy.misc import imread, imresize
import matplotlib.pyplot as plt
%matplotlib inline
cwd = os.getcwd()
print ("current directory is [%s]" % (cwd))

current directory is [/Users/noah/Documents/study/lecture/edwith]


In [2]:
# directory locations
paths = ["./data/benz/",
        "./data/audi/",
        "./data/infinity/"]

categories = ['benz', 'audi', 'infinity']

# configurations
imgsize = [64,64]
use_gray = 0
data_name = "custom_data"

print ("your images should be at")
for i, path in enumerate(paths):
    print ("  [%d/%d] %s" % (i, len(paths), path))
print ("data will be saved to \n [%s]" % (cwd + '/data/' + data_name + '.npz'))

your images should be at
  [0/3] ./data/benz/
  [1/3] ./data/audi/
  [2/3] ./data/infinity/
data will be saved to 
 [/Users/noah/Documents/study/lecture/edwith/data/custom_data.npz]


In [3]:
# rgb2gray
def rgb2gray(rgb):
    if len(rgb.shape) is 3:
        return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])
    else:
        return rgb

In [8]:
# load images
nclass = len(paths)
valid_exts = [".jpg", ".gif", ".png", ".tga", ".jpeg"]
imgcnt = 0

for i, relpath in zip(range(nclass), paths):    # i는 인덱스, relpath는 상대경로
    path = cwd + "/" + relpath    # 절대경로로 변환
    flist = os.listdir(path)    # flist: path 안에 있는 모든 파일명의 리스트
    for f in flist:    # f는 파일명 1개
        if os.path.splitext(f)[1].lower() not in valid_exts:    # 확장자명 검사
            continue
        fullpath = os.path.join(path, f)    # 경로와 파일명을 합친다
        currimg = imread(fullpath)    # 이미지 1개를 읽어온다
        
        # convert to gray
        if use_gray:    # use_gray 변수가 1이면 흑백 이미지 사용
            grayimg = rgb2gray(currimg)
        else:
            grayimg = currimg
        
        # resize
        graysmall = imresize(grayimg, imgsize)/255.    # 사이즈 통일
        grayvec = np.reshape(graysmall, (1,-1))    # 이미지를 1D array로 변환
        
        # save
        # np.eye(shape) : identity matrix // [i:i+1] : 1차원이 아닌 2차원 벡터
        curr_label = np.eye(nclass, nclass)[i:i+1, :]
        if imgcnt is 0:
            totalimg = grayvec
            totallabel = curr_label
        else:
            totalimg = np.concatenate((totalimg, grayvec), axis=0)
            totallabel = np.concatenate((totallabel, curr_label), axis=0)
        imgcnt += 1
print ("total %d images" % imgcnt)

total 14 images


`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  del sys.path[0]
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


In [29]:
def print_shape(string, x):
    print ("shape of [%s] is [%s]" % (string, x.shape))
    
randidx = np.random.choice(list(range(imgcnt)), imgcnt, replace=False)
trainidx = randidx[0:int(4*imgcnt/5)]
testidx = randidx[int(4*imgcnt/5):imgcnt]
trainimg = totalimg[trainidx, :]
trainlabel = totallabel[trainidx, :]
testimg = totalimg[testidx, :]
testlabel = totallabel[testidx, :]

print_shape("totalimg", totalimg)
print_shape("totallabel", totallabel)
print_shape("trainimg", trainimg)
print_shape("trainlabel", trainlabel)
print_shape("testimg", testimg)
print_shape("testlabel", testlabel)

shape of [totalimg] is [(14, 12288)]
shape of [totallabel] is [(14, 3)]
shape of [trainimg] is [(11, 12288)]
shape of [trainlabel] is [(11, 3)]
shape of [testimg] is [(3, 12288)]
shape of [testlabel] is [(3, 3)]


In [31]:
# save to numpy
savepath = cwd + "/data/" + data_name + ".npz"
np.savez(savepath, trainimg=trainimg, trainlabel=trainlabel, testimg=testimg,
         testlabel=testlabel, imgsize=imgsize, use_gray=use_gray, categories=categories)
print ("data saved")

data saved


In [32]:
# load
cwd = os.getcwd()
loadpath = cwd + "/data/" + data_name + ".npz"
l = np.load(loadpath)
print (l.files)

# parse data
trainimg_loaded = l['trainimg']
trainlabel_loaded = l['trainlabel']
testimg_loaded = l['testimg']
testlabel_loaded = l['testlabel']
categories_loaded = l['categories']

print ("[%d] training images" % (trainimg_loaded.shape[0]))
print ("[%d] test images" % (testimg_loaded.shape[0]))

['trainimg', 'trainlabel', 'testimg', 'testlabel', 'imgsize', 'use_gray', 'categories']
[11] training images
[3] test images


['trainimg',
 'trainlabel',
 'testimg',
 'testlabel',
 'imgsize',
 'use_gray',
 'categories']