#### The code below was tested on Python 3.6.7, Numpy 1.15.4, OpenCV 3.4.2

In [1]:
import sys
print(sys.version)
import numpy as np
print(np.version.version)
import cv2
print(cv2.__version__)

3.6.8 |Anaconda, Inc.| (default, Dec 29 2018, 19:04:46) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
1.16.0
4.0.0


#### For more information, look up 'Haar features', 'Viola-Jones algorithm', as well as OpenCV documentation.

#### Step 1: Collect 'negative or 'background images, where the target object is not present.

Generally a bg.txt file that contains the path to each image, by line.

example:<br>
    neg/1.jpg<br>
    neg/2.jpg

#### Step 2: Collect 'positive' images, where the target object is present

Generally a pos.txt file that contains the path to each image, by line; along with how many objects, and where they are located in the image (x, y, width, height).

Example:
pos/1.jpg 1 0 0 50 50
pos/2.jpg 1 0 0 50 50

Generally negative images should be larger than positive images. Ie 100X100 for negatives, and 50X50 for positives.

#### Step 3: Create a positive vector file by stitching together all positive images, using an OpenCV command.

#### Step 4: Train cascade using an OpenCV command.

In [2]:
import urllib.request
import numpy as np
import cv2
import os

## Collect negative & positive images

Replace the image collection link below, and update the pic_num.
pic_num is the starting filename number for image scrapped.
I repeated this process for different links several times to get 2100 negative samples. Note: if you forget to change the pic_num, it will replace the image you already have in the directory.

In [None]:
def store_raw_images():
    """Scrape a given link that contains links to images, save to neg folder and resize."""
    
    neg_images_link = 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=n00015388'
    neg_image_urls = urllib.request.urlopen(neg_images_link).read().decode()
    
    if not os.path.exists('neg'):
        os.makedirs('neg')
    
    pic_num = 1615 # starting number for naming image files
    
    for i in neg_image_urls.split('\n'):
        try: 
            #print(i)
            filepath = 'neg/'+str(pic_num)+'.jpg'
            urllib.request.urlretrieve(i, filepath)
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            resized_image = cv2.resize(img, (100,100))
            cv2.imwrite(filepath, resized_image)
            pic_num += 1
            
        except Exception as e:
            print(str(e))
            
store_raw_images()
    

#### Resize and turn positive images to grayscale (if needed)

In [None]:
for img_name in os.listdir('pos'):
    img_path = 'pos' + '/' + img_name
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    try: 
        resized_img = cv2.resize(img, (50,50))
        cv2.imwrite(img_path, resized_img)
    except Exception as e:
        print(str(e))

## Remove unwanted images (if applicable)

In [None]:
def exclude():
    """Remove images that are exact copies of ones in the 'exclude' folder from the 'neg' and 'pos' folders """

    for folder_path in ['neg']:
        
        for img_name in os.listdir(folder_path):
            # the image to be checked
            image_path = str(folder_path) + '/' + str(img_name)
            
            for excluded_img_name in os.listdir('exclude'):
                # the image template for exclusion
                exclude_img_path = 'exclude/' + str(excluded_img_name)
                
                try: 
                    image = cv2.imread(image_path)
                    excluded_img = cv2.imread(exclude_img_path)
                    
                    if image.shape == excluded_img.shape and not(np.bitwise_xor(image, excluded_img).any()):
                        os.remove(image_path)
                        print('removed ' + image_path)

                except Exception as e:
                    print(str(e))

exclude()

In [None]:
def create_pos_n_neg():
    """Creates the files with path directory and descriptions for positive and negative images."""
    
    for folder_path in ['neg']:
        
        for img_name in os.listdir(folder_path):
            
            if folder_path == 'neg':
                line = folder_path +'/' + img_name + '\n'
                with open('bg.txt', 'a') as f:
                    f.write(line)
                    
            elif folder_path == 'pos':
                line = folder_path + '/' + img_name + '1 0 0 50 50\n'
                with open ('info.dat', 'a') as f:
                    f.write(line)
                    
create_pos_n_neg()

## Overlay positive images with background (negative) images (if using only one positive sample)

#### Create the following folders

In [3]:
if not os.path.exists('data'): # folder for cascade to be created
        os.makedirs('data')
        
if not os.path.exists('info'): # positive images directory
        os.makedirs('info')

#### Run the following command in terminal to create positive images, by combining with the negative background images.

opencv_createsamples -img sample1.jpg -bg bg.txt -info info/info.lst -pngoutput info -maxxangle 0.6 -maxyangle -0.6 -maxzangle 0.6 -num 2100

The parameters are as the following:
- -img : the positive image file
- -bg : the background text file (negative images)
- -info : the info file that contains descriptions of positive image within the image
- -pngoutput : directory to generate the images
- maxxangle : 
- maxyangle :
- maxzangle :
- num : number of images to generate

In [4]:
os.system("opencv_createsamples -img sample1.jpg -bg bg.txt -info info/info.lst -pngoutput info -maxxangle 0.6 -maxyangle -0.6 -maxzangle 0.6 -num 2100")

0

## Create the vector file by running the following command in terminal

opencv_createsamples -info info/info.lst -num 2100 -w 20 -h 20 -vec positives.vec

The parameters are as the following:
- -info where the info list is located
- -num number of vector items
- -w width
- -h height
- -vec output vector file

The width and height is the detection area, the larger the slower it would be to train. 20 is an average size used.

In [5]:
os.system("opencv_createsamples -info info/info.lst -num 2100 -w 20 -h 20 -vec positives.vec")

0

## Training the model

#### Run the following command in terminal

opencv_traincascade -data data -vec positives.vec -bg bg.txt -numPos 1800 -numNeg 900 -numStages 10 -w 20 -h 20

The parameters are as the following:
    
- -data
- -vec : vector filename
- -bg : background (negative) text file of image paths
- -numPos : the number to be reached before going to the next stage, it should be no more than 0.9* number of positive samples
- -numNeg : typically half of number of negative samples
- -numStages : 10
- -w : 20
- -h : 20

In [6]:
os.system("opencv_traincascade -data data -vec positives.vec -bg bg.txt -numPos 1800 -numNeg 900 -numStages 10 -w 20 -h 20")

0

Alternatively, use the 'nohup' and '&' to wrap around the command, so that it will run in the background, and can be stopped and resumed later if needed:

nohup opencv_traincascade -data data -vec positives.vec -bg bg.txt -numPos 900 -numNeg 500 -numStages 10 -w 20 -h 20 &

## Run

In [None]:
import numpy as np
import cv2
minion_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

cap = cv2.VideoCapture(0)

while True:
    ret, img = cap.read()
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    minions = minion_cascade.detectMultiScale(gray)   # The parameters here need to be tuned
    
    for (x,y,w,h) in minions:
        cv2.rectangle(img, (x,y), (x+w, y+h), (255,255,0), 2)
        
    cv2.imshow('img', img)
    k = cv2.waitKey(30) & 0xff
    if k == 27:
        break
        
cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)


Notes:
There seems to be a bug with OpenCV running in Jupyter Notebook, cv2.imshow() causes the program to hang. The last line 'cv2.waitKey(1)' somehow bypasses it and allows the cv2.imshow() to work.
Even so, you still need to restart the kernel after stopping the run.

#### Restart kernel

In [6]:
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)  


{'status': 'ok', 'restart': True}