# Image process using openCV
by Jenny Zeng

It will extract hist bins of an image to be the features

In [1]:
import cv2
import numpy as np
import glob
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import mltools as ml

In [2]:
# parameters
NUM_BINS = 8 # number of bins for each channel
COLOR = ('b','g','r') # channel order in array
IMG_DIR_PATH = [('/notebooks/Minecraft-AI/mc-data/fence_rgb_2/*.jpg',0),
               ('/notebooks/Minecraft-AI/mc-data/pig_rgb_2/*.jpg',1)]

FILE_SAVE_PATH = '/notebooks/Minecraft-AI/mc-data/fence_pig.txt'
labels ={"no_pig":0,"pig":1}
# labels ={"mesa":0, "forest":1, "desert":2, 'jungle':3,"eh":4}

## Analyze a sinlge image

In [None]:
IMG_PATH='/notebooks/Minecraft-AI/mc-data/mesa/mesa_rgb/mesa_10.jpg'
img=cv2.imread(IMG_PATH)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
plt.show()

## historgram of the image



In [None]:
hist = np.zeros((NUM_BINS,3))
bins = np.linspace(0, 256, NUM_BINS)
for i,col in enumerate(COLOR):
    histr = cv2.calcHist([img],[i],None,[NUM_BINS],[0,256])
    plt.plot(histr, color=col)
    hist[:,i] = histr[:,0]
plt.show()

histr is an array with shape (NUM_BINS,1), that is 8 bins and 1 channel if NUM_BINS=8

In [None]:
histr.shape

hist is an array with shape (NUM_BINS,3), hist is what we want to store as the features of an image

In [None]:
hist.shape

reshape hist to be an 1-d array

In [None]:
hist = hist.flatten()
hist

### helper function to convert a image to be data 
with 24 features and 1 label
(a 1-d array with length = 25)

In [3]:
def convertImage(img_path, label):
    img=cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    hist = np.zeros((NUM_BINS,3))
    bins = np.linspace(0, 256, NUM_BINS)
    for i,col in enumerate(COLOR):
        histr = cv2.calcHist([img],[i],None,[NUM_BINS],[0,256])
        hist[:,i] = histr[:,0]
    hist = hist.flatten()
    hist = np.append(hist, label)
    return hist

In [None]:
hist, img = convertImage(IMG_PATH, 0)
print hist


In [None]:
hist.shape

In [None]:
img

## save images to a txt file

### helper function on extracting images from a single dir with jpg images
each subdir has the images with the same class

In [4]:
def extractFromSubdir(subdir, label):
    img_files = glob.glob(subdir)
    data = np.array([convertImage(img_dir,label) for img_dir in img_files])
    
    return data

In [None]:
subdir = '/notebooks/Minecraft-AI/mc-data/mesa/mesa_rgb/*.jpg'
data = extractFromSubdir(subdir,labels["mesa"])

from the example subdir, we get 209 imgs, and each image has 24 features, 1 label

In [None]:
data.shape

### save all images in subdir of the IMG_DIR_PATH to a txt file

In [None]:
subdir


In [5]:
subdir, label = IMG_DIR_PATH[0]
data = extractFromSubdir(subdir, label) # assume has >=1 path

In [6]:
for subdir, label in (IMG_DIR_PATH[1:]):
    data = np.vstack((data, extractFromSubdir(subdir, label)))

In [None]:
np.random.shuffle(data)

In [7]:
np.savetxt(FILE_SAVE_PATH, data, delimiter=';',fmt='%.4e')

In [None]:
data.shape

## example for loading data from txt

In [8]:
data = np.loadtxt(FILE_SAVE_PATH, delimiter=';')

In [9]:
print data.shape

(3733, 25)


In [None]:
np.random.shuffle(data)

In [None]:
X = data[:,:-1]
Y = data[:,-1].astype(np.int64)

In [None]:
print X.shape
print Y.shape

In [None]:
Xtr, Xte, Ytr, Yte = ml.splitData(X,Y,train_fraction=0.8)

In [None]:
print Xtr.shape
print Xte.shape
print Ytr.shape
print Yte.shape