# Computer Vision

In [None]:
# Captura por webcam y region de interes

# https://opencv.org/

In [None]:
import numpy as np
import cv2

In [None]:
# captura webcam

def captura():
    cam=cv2.VideoCapture(0)
    while 1:
        ret, frame=cam.read()
        cv2.imshow('Captura', frame)
        
        if ret==False: break
        
        key=cv2.waitKey(1)
        
        if key%256==27: break # tecla ESC
        
        elif key%256==32: # SPACE
            img_name='captura.png'
            cv2.imwrite(img_name, frame)
            print ('Captura guardada')
            break
            
    cam.release()
    cv2.destroyAllWindows()

In [None]:
def contraste(): # pasa a blanco y negro puro
    image=cv2.imread('captura.png')
    im=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    im=(255-im)
    umbral=170
    
    img=np.zeros(shape=im.shape)
    for i in range(im.shape[0]):
        for j in range(im.shape[1]):
            if im[i][j]>umbral: img[i][j]=255
            else: img[i][j]=0
    
    cv2.imshow('img', img)
    cv2.imwrite('b&w.png', img)
    cv2.waitKey(0)

In [None]:
def contorno(): # regiones de interes
    umbral_fino=10
    image=cv2.imread('b&w.png')
    im=cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    im=(255-im)
    thresh=cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                 cv2.THRESH_BINARY_INV, 11, 2)
    rect_kernel=cv2.getStructuringElement(cv2.MORPH_RECT, (30,10))
    
    threshed=cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, rect_kernel)
    
    contorno,_=cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    s_contorno=sorted(contorno, key=lambda x: cv2.boundingRect(x)[1]+cv2.boundingRect(x)[0]*image.shape[1])
    
    idx=0
    for cut in s_contorno:
        idx+=1
        x,y,w,h=cv2.boundingRect(cut)
        roi=im[y:y+h, x:x+w]
        
        if h<umbral_fino or w<umbral_fino: continue
            
        cv2.imwrite(str(idx)+'.png', roi)
        cv2.rectangle(im, (x,y), (x+w, y+h), (200, 0, 0), 2)
        
        cv2.imshow('Imagen B-N', im)
        cv2.waitKey(0)
    
    return idx
                    
        

In [None]:
captura()
contraste()
print (contorno())

In [None]:
captura()

In [None]:
contorno()

In [None]:
# Pintando

img=cv2.imread('captura.png')

H=img.shape[0]
W=img.shape[1]

cv2.line(img, (W//2, 0), (W//2, H), (255, 255, 255), 15)
cv2.circle(img, (W//2, H//2), 512, (0,0,255), 10)

cv2.imshow('pinturas', img)
cv2.resizeWindow('pinturas', 600, 600)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# mezclar imagenes

img1=cv2.imread('captura.png', cv2.IMREAD_COLOR)
img2=cv2.imread('1.png', cv2.IMREAD_COLOR)

img2=cv2.resize(img2, img1.shape[:2][::-1], interpolation=cv2.INTER_AREA)

In [None]:
cv2.imshow('adicion', cv2.add(img1, img2))
cv2.resizeWindow('adicion', 600, 600)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Reconocimiento facial

In [None]:
import os
import matplotlib.pyplot as plt
import glob
import pandas as pd

images={}

for e in glob.glob('./fotos_alumnos/**/*.JPG'):
    alumno=e.split('/')[2]
    foto=e.split('/')[3]
    
    img=cv2.imread(e)
    
    print ('Read"{}" -> {}'.format(alumno, foto))
    gray=cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    if alumno in images:
        images[alumno]['images'].append(gray)
        images[alumno]['name'].append(e)
    else:
        images[alumno]={'images':[gray], 'name':[e]}
        

In [None]:
list(map(lambda x: x[0], images.items()))

In [None]:
face_cascade=cv2.CascadeClassifier('haarcascade_frontalface_alt.xml')

In [None]:
imgs=images['Reynaldo']['images']

plt.figure(figsize=(10,10))
for i,e in enumerate(imgs):
    img=e.copy()
    faces=face_cascade.detectMultiScale(img, 1.1, 4)
    for (x,y,w,h) in faces:
        cv2.rectangle(img, (x,y), (x+w, y+h), (255,0,0), 4)
        break
    plt.subplot(2,2,i+1)
    (x,y,w,h)=faces[0]
    crop=img[y:y+h, x:x+w]
    plt.imshow(crop, cmap='gray')
    if i>=3: break

In [None]:
for alumno, data in images.items():
    for i,e in enumerate(data['images']):
        img=e.copy()
        faces=face_cascade.detectMultiScale(img, 1.1, 4)
        (x,y,w,h)=faces[0]
        crop=img[y:y+h, x:x+w]
        
        if 'crops' in images[alumno]:
            images[alumno]['crops'].append(crop)
        else:
            images[alumno]['crops']=[crop]
        
        name='{}/caras/{}-{}.JPG'.format(os.getcwd(), alumno, i)
        
        print (name)
        
        cv2.imwrite(name, crop)

In [None]:
def features(img, n_f=40):
    fft=np.log10(np.abs(np.fft.fft2(img)))
    w=fft.shape[0]//2
    fft=fft[0:w, 0:w]
    feats=fft[:n_f, :n_f]
    return np.hstack(feats)

In [None]:
datacols=[]
for alumno, data in images.items():
    for crop,name in zip(data['crops'], data['name']):
        X_alu=features(crop)
        datacols.append({'alumno':alumno, 'foto':name, 'features':X_alu})

In [None]:
df=pd.DataFrame(datacols)
df.head()

In [None]:
X_test=df.groupby('alumno').nth(0).reset_index()
X_train=df[~df['foto'].isin(X_test['foto'])].reset_index()

In [None]:
preds=[]

for e in X_test.index:
    X_miss=X_test.iloc[e]['features']
    X_miss_name=X_test.iloc[e]['alumno']
    X_data=X_train['features']
    
    dist=[]
    
    for i,e in X_data.items():
        if e.shape==X_miss.shape:
            d=np.linalg.norm(e-X_miss)
            dist.append((int(i), d))
    
    sorted_d=pd.DataFrame(dist, columns=['index', 'distance']).sort_values(by='distance')
    w_idx=int(sorted_d.iloc[0].values[0])
    p=X_train.iloc[w_idx]['alumno']
    preds.append([X_miss_name, p])

In [None]:
pd.DataFrame(preds, columns=['real', 'prediccion'])

# Deteccion de objetos

In [None]:
import cv2
THRESHOLD=0.8

In [None]:
class_names = {0: 'background',
              1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane', 6: 'bus',
              7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light', 11: 'fire hydrant',
              13: 'stop sign', 14: 'parking meter', 15: 'bench', 
              16: 'bird', 17: 'cat',
              18: 'dog', 19: 'horse', 20: 'sheep', 21: 'cow', 22: 'elephant', 23: 'bear',
              24: 'zebra', 25: 'giraffe', 27: 'backpack', 28: 'umbrella', 31: 'handbag',
              32: 'tie', 33: 'suitcase', 
              34: 'frisbee', 35: 'skis', 36: 'snowboard',
              37: 'sports ball', 38: 'kite', 39: 'baseball bat', 40: 'baseball glove',
              41: 'skateboard', 42: 'surfboard', 43: 'tennis racket', 44: 'bottle',
              46: 'wine glass', 47: 'cup', 48: 'fork', 49: 'knife', 50: 'spoon',
              51: 'bowl', 52: 'banana', 53: 'apple', 54: 'sandwich', 55: 'orange',
              56: 'broccoli', 57: 'carrot', 58: 'hot dog', 59: 'pizza', 60: 'donut',
              61: 'cake', 62: 'chair', 
              63: 'couch', 64: 'potted plant', 65: 'bed',
              67: 'dining table', 70: 'toilet', 72: 'tv', 73: 'laptop', 74: 'mouse',
              75: 'remote', 76: 'keyboard', 77: 'cell phone', 78: 'microwave', 79: 'oven',
              80: 'toaster', 81: 'sink', 82: 'refrigerator', 84: 'book', 85: 'clock',
              86: 'vase', 87: 'scissors', 88: 'teddy bear', 89: 'hair drier', 90: 'toothbrush'}

In [None]:
model=cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 
                                    'ssd_mobilenet_v2_coco_2018_03_29.pbtxt')


cap=cv2.VideoCapture(0)

while 1:
    ret,frame=cap.read()
    model.setInput(cv2.dnn.blobFromImage(frame, size=(300,300), swapRB=True))
    output=model.forward()[0,0,:,:]
    
    for e in output:
        confi=e[2]
        if confi>THRESHOLD:
            class_id=e[1]
            class_name=class_names[class_id]
            
            box_x=e[3]
            box_y=e[4]
            box_w=e[5]
            box_h=e[6]
            
            h,w,ch=frame.shape
            
            box_x=e[3]*w
            box_y=e[4]*h
            box_w=e[5]*w
            box_h=e[6]*h
            
            cv2.rectangle(frame, (int(box_x), int(box_y)), (int(box_w), int(box_h)), 
                          (0,255,0), thickness=2)
            
            cv2.putText(frame, class_name+' '+str(round(confi, 2)), (int(box_x), int(box_y+.05*h)),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0))
            
            cv2.imshow('deteccion', frame)
            if cv2.waitKey(1) & 0xFF==ord('q'): break
    
cap.release()
cv2.destroyAllWindows()  

# YOLO V3 (You Only Look Once)

In [None]:
import numpy as np
import argparse
import cv2
import subprocess
import time
import os
from yolo_utils import infer_image, show_image

In [None]:
FLAGS=[]

In [None]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('-w', '--weights',
        type=str,
        default='./yolov3-coco/yolov3.weights',
        help='')

    parser.add_argument('-cfg', '--config',
        type=str,
        default='./yolov3-coco/yolov3.cfg',
        help='Path to the configuration file for the YOLOv3 model.')

    
    parser.add_argument('-i', '--image-path',
        type=str,
        help='The path to the image file')

    parser.add_argument('-v', '--video-path',
        type=str,
        help='The path to the video file')


    parser.add_argument('-l', '--labels',
        type=str,
        default='./yolov3-coco/coco-labels',
        help='Path to the file having the \
                    labels in a new-line seperated way.')

    parser.add_argument('-c', '--confidence',
        type=float,
        default=0.5,
        help='The model will reject boundaries which has a \
                probabiity less than the confidence value. \
                default: 0.5')

    
    parser.add_argument('-th', '--threshold',
        type=float,
        default=0.3,
        help='The threshold to use when applying the \
                Non-Max Suppresion')

    
    parser.add_argument('--download-model',
        type=bool,
        default=False,
        help='Set to True, if the model weights and configurations \
                are not present on your local machine.')

    parser.add_argument('-t', '--show-time',
        type=bool,
        default=False,
        help='Show the time taken to infer each image.')

    FLAGS, unparsed = parser.parse_known_args()
    
    labels=open(FLAGS.labels).read().strip().split('\n')
    
    colors=np.random.randint(0,255, size=(len(labels), 3), dtype='uint8')
    
    net=cv2.dnn.readNetFromDarknet(FLAGS.config, FLAGS.weights)
    
    layer_names=net.getLayerNames()
    layer_names=[layer_names[i[0]-1] for i in net.getUnconnectedOutLayers()]
    
    if FLAGS.image_path:
        print ()
    
    else:
        count=0
        vid=cv2.VideoCapture(0)
        while 1:
            ret,frame=vid.read()
            h,w=frame.shape[:2]
            if count==0:
                frame, boxes, confi, c_ids, idxs= infer_image(net, layer_names, 
                                                             h,w,frame, colors, labels, FLAGS)
                count+=1
            else:
                frame, boxes, confi, c_ids, idxs= infer_image(net, layer_names, 
                                                              h,w,frame, colors, labels, 
                                                              FLAGS, boxes, confi, c_ids, idxs,
                                                              infer=False)
                count=(count+1)%6
            cv2.imshow('webcam', frame)
            
            if cv2.waitKey(1) & 0xFF==ord('q'): break
    
    vid.release()
    cv2.destroyAllWindows()  