In [3]:
from torchvision.models import detection
from imutils.video import VideoStream
from imutils.video import FPS
import numpy as np
import argparse
import imutils
import pickle
import torch
import time
import cv2

In [4]:
def auto_canny(image, sigma=0.33):
	# compute the median of the single channel pixel intensities
	v = np.median(image)

	# apply automatic Canny edge detection using the computed median
	lower = int(max(0, (1.0 - sigma) * v))
	upper = int(min(255, (1.0 + sigma) * v))
	edged = cv2.Canny(image, lower, upper)

	# return the edged image
	return edged

color_dict_HSV = {'black': [[180, 255, 30], [0, 0, 0]],
              'white': [[180, 18, 255], [0, 0, 231]],
              'red1': [[180, 255, 255], [159, 50, 70]],
              'red2': [[9, 255, 255], [0, 50, 70]],
              'green': [[89, 255, 255], [36, 50, 70]],
              'blue': [[128, 255, 255], [90, 50, 70]],
              'yellow': [[35, 255, 255], [25, 50, 70]],
              'purple': [[158, 255, 255], [129, 50, 70]],
              'orange': [[24, 255, 255], [10, 50, 70]],
              'gray': [[180, 18, 230], [0, 0, 40]],
			  'brown': [[25, 100, 97], [24, 33, 18]]}

# set the device we will be using to run the model
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the list of categories in the COCO dataset and then generate a
# set of bounding box colors for each class
CLASSES = pickle.loads(open("coco_classes.pickle", "rb").read())
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

# load the model and set it to evaluation mode
model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True,
	num_classes=len(CLASSES), pretrained_backbone=True).to(DEVICE)
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [6]:
# Inicializar video streams
print("[INFO] accessing video stream...")
cap = cv2.VideoCapture("IMG_3030.mp4")
fps = FPS().start()

#Verificar si la camara esta abiera
if (cap.isOpened()==False):
	print("[INFO] No fue posible abrir el video, favor reintente")

#Leer el video hasta que se complete
while(cap.isOpened()):
	#Leer frame-by-frame
	ret, frame = cap.read()
	if ret == True:
		
		frame = imutils.resize(frame, width=900) #Modificar tamaño del video
		orig = frame.copy() #Guardar una copia del video
		frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) #Cambiar a RGB puesto que Pytorch 
		frame = frame.transpose((2, 0, 1)) #Ordenar canales segun Pytorch lee

		frame = np.expand_dims(frame, axis=0) #Expandir dimensiones para el formato que requiere el Tensor en Torch
		frame = frame / 255.0 #Normalizar pixeles
		frame = torch.FloatTensor(frame) #Pasar imagen a Tensor Pytorch

		frame = frame.to(DEVICE) #Enviar frame a dispositivo (Si es CUDA usara GPU, si es CPU entonces CPU)
		detections = model(frame)[0] #Obtener predicciones sobre el frame
		
		# Para las predicciones que encuentre, demarcar en imagen indicando su probabilidad
		for i in range(0, len(detections["boxes"])):
			confidence = detections["scores"][i] #Setear variable con probabilidades de predicciones

		# Quitar predicciones con baja probabilidad (Bajo el treshold definido)
		# greater than the minimum confidence
		if confidence > 0.5:
			# Extraer el indice de la clase predicha para encerrar en su respectivo "bounding box"
			idx = int(detections["labels"][i])
			box = detections["boxes"][i].detach().cpu().numpy()
			(startX, startY, endX, endY) = box.astype("int")

			# draw the bounding box and label on the frame
			label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
			cv2.rectangle(orig, (startX, startY), (endX, endY),
				COLORS[idx], 2)
			y = startY - 15 if startY - 15 > 15 else startY + 15
			cv2.putText(orig, label, (startX, y),
				cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)			
			
			
			# Distinguir entre caja grande, mediana o pequeña
			#if area > 300 and area <= 500:
				# Caja pequeña
			#	cv2.rectangle(edges, (x, y), (x + w, y + h), (0, 0, 255), 2)
			#	cv2.putText(edges, "Caja pequeña - Area: " + str(area), (x,y+30), 1, 1, (0,0,255))
			#elif area > 500 and area <= 900:
				# Caja mediana
			#	cv2.rectangle(edges, (x, y), (x + w, y + h), (255, 0, 0), 2)
			#	cv2.putText(edges, "Caja mediana - Area: " + str(area), (x,y+30), 1, 1, (255,0,0))
			#elif area > 900:
				# Caja grande
			#	cv2.rectangle(edges, (x, y), (x + w, y + h), (0, 255, 0), 2)
			#	cv2.putText(edges, "Caja grande - Area: " + str(area), (x,y+30), 1, 1, (0, 255, 0))
			
		# Mostrar video
		#cv2.imshow("Original", frame)
		# Si se apreta la tecla q, el video se detiene antes
		if cv2.waitKey(100) & 0xFF == ord("q"):
			break
		
		# Actualizar FPS
		fps.update()

	else:
		break


fps.stop()
print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
#Limpiar cache del video en variable cap
cap.release()
#Cerrar ventanas
cv2.destroyAllWindows()

[INFO] accessing video stream...


AttributeError: module 'torchvision' has no attribute '_is_tracing'