In [1]:
"""
https://github.com/FrederikSchorr/sign-language

This module 
* launches the webcam,
* waits for the start signal from user,
* captures 5 seconds of video,
* extracts frames from the video
* calculates and displays the optical flow,
* and uses the neural network to predict the sign language gesture.
* Then start again.
"""

# import the necessary packages
import time
import os
import glob
import sys
import random

import numpy as np
import pandas as pd

import cv2

In [2]:
from timer import Timer
from frame import video2frames, images_normalize, frames_downsample, images_crop
from frame import images_resize_aspectratio, frames_show, frames2files, files2frames, video_length
from videocapture import video_start, frame_show, video_show, video_capture
from opticalflow import frames2flows, flows2colorimages, flows2file, flows_add_third_channel
from datagenerator import VideoClasses

Using TensorFlow backend.


In [3]:
from model_mobile import features_2D_load_model
from model_lstm import lstm_load

In [4]:
from model_i3d import I3D_load
from predict import probability2label

In [5]:
# dataset
diVideoSet = {"sName" : "04-chalearn",
    "nClasses" : 20,   # number of classes
    "nFramesNorm" : 40,    # number of frames per video
    "nMinDim" : 240,   # smaller dimension of saved video-frames
    "tuShape" : (240, 320), # height, width
    "nFpsAvg" : 10,
    "nFramesAvg" : 50, 
    "fDurationAvg" : 5.0} # seconds

In [6]:
# files
sClassFile = "data-set/%s/%03d/class.csv"%(diVideoSet["sName"], diVideoSet["nClasses"])
sVideoDir = "data-set/%s/%03d"%(diVideoSet["sName"], diVideoSet["nClasses"])

In [7]:
print("\nStarting gesture recognition live demo ... ")
print(os.getcwd())
diVideoSet


Starting gesture recognition live demo ... 
C:\Users\JaimePanchana\Desktop\Jean\2go\python\1c3d


{'sName': '04-chalearn',
 'nClasses': 20,
 'nFramesNorm': 40,
 'nMinDim': 240,
 'tuShape': (240, 320),
 'nFpsAvg': 10,
 'nFramesAvg': 50,
 'fDurationAvg': 5.0}

In [8]:
# load label description
oClasses = VideoClasses(sClassFile)

Loaded 20 classes from data-set/04-chalearn/020/class.csv


In [9]:
oClasses.nClasses

20

In [10]:
oClasses.dfClass

Unnamed: 0.1,Unnamed: 0,sClass,sLong,sCat,sDetail
0,0,c021,ItalianGestures/Bellissima,ItalianGestures,HERMOSISIMA
1,2,c026,ItalianGestures/Madonna,ItalianGestures,Madonna
2,1,c027,ItalianGestures/NonMiFrega,ItalianGestures,NonMiFrega
3,3,c029,ItalianGestures/SeiPazzo,ItalianGestures,SeiPazzo
4,4,c030,ItalianGestures/VieniQua,ItalianGestures,VEN OE
5,5,c049,GestunoDisaster/102_thunderstorm_orage,GestunoDisaster,TORMENTA
6,6,c052,GestunoDisaster/110_earthquake_trembleme,GestunoDisaster,TERREMOTO
7,7,c057,GestunoLandscape/63_moon_lune,GestunoLandscape,Moon
8,8,c065,GestunoLandscape/85_volcano_volcan,GestunoLandscape,VOLCAN
9,9,c068,GestunoLandscape/90_river_fleuve,GestunoLandscape,River


In [11]:
oClasses.liClasses

['c021',
 'c026',
 'c027',
 'c029',
 'c030',
 'c049',
 'c052',
 'c057',
 'c065',
 'c068',
 'c070',
 'c071',
 'c072',
 'c074',
 'c079',
 'c086',
 'c177',
 'c246',
 'c247',
 'c248']

In [12]:
sModelFile = "model/20180627-0729-chalearn020-oflow-i3d-entire-best.h5"
h, w = 224, 224

In [13]:
keI3D = I3D_load(sModelFile, 
                 diVideoSet["nFramesNorm"], 
                 (h, w, 2), 
                 oClasses.nClasses)

Load trained I3D model from model/20180627-0729-chalearn020-oflow-i3d-entire-best.h5 ...
Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Loaded input shape (40, 224, 224, 2), output shape (20,)


In [14]:
# open a pointer to the webcam video stream
oStream = video_start(device = 1, tuResolution = (320, 240), nFramePerSecond = diVideoSet["nFpsAvg"])

Try to initialize inbuilt camera ...
Initialized video device 0, with resolution (320, 240) and target frame rate 10


In [15]:
#liVideosDebug = glob.glob(sVideoDir + "/train/*/*.*")
nCount = 0
sResults = ""
timer = Timer()

In [None]:
# loop over action states
cadena = ""
while True:
    # show live video and wait for key stroke
    key = video_show(oStream, "yellow", "Press <blank> to start", sResults, label = cadena, tuRectangle = (h, w))

    # start!
    if key == ord(' '):
        # countdown n sec
        video_show(oStream, "orange", "Recording starts in ", tuRectangle = (h, w), nCountdown = 3)

        # record video for n sec
        fElapsed, arFrames, _ = video_capture(oStream, "red", "Recording ", \
            tuRectangle = (h, w), nTimeDuration = int(diVideoSet["fDurationAvg"]), bOpticalFlow = False)
        print("\nCaptured video: %.1f sec, %s, %.1f fps" % \
            (fElapsed, str(arFrames.shape), len(arFrames)/fElapsed))

        # show orange wait box
        frame_show(oStream, "orange", "Translating sign ...", tuRectangle = (h, w))

        # crop and downsample frames
        arFrames = images_crop(arFrames, h, w)
        arFrames = frames_downsample(arFrames, diVideoSet["nFramesNorm"])

        # Translate frames to flows - these are already scaled between [-1.0, 1.0]
        print("Calculate optical flow on %d frames ..." % len(arFrames))
        timer.start()
        arFlows = frames2flows(arFrames, bThirdChannel = False, bShow = True)
        print("Optical flow per frame: %.3f" % (timer.stop() / len(arFrames)))

        # predict video from flows			
        print("Predict video with %s ..." % (keI3D.name))
        arX = np.expand_dims(arFlows, axis=0)
        arProbas = keI3D.predict(arX, verbose = 1)[0]
        nLabel, sLabel, fProba = probability2label(arProbas, oClasses, nTop = 3)
        print()
        sResults = "Sign: %s (%.0f%%)" % (sLabel, fProba*100.)
        print(sResults)
        cadena += sLabel
        nCount += 1

    # quit
    elif key == ord('q'):
        break

# do a bit of cleanup
oStream.release()
cv2.destroyAllWindows()


Captured video: 5.1 sec, (76, 240, 320, 3), 15.0 fps
Calculate optical flow on 40 frames ...
Execution time: 7.78 sec
Optical flow per frame: 0.195
Predict video with i3d_with_top ...
Top 1: [  4] c030 VEN OE (confidence 97.7%)
Top 2: [  6] c052 TERREMOTO (confidence 1.1%)
Top 3: [ 17] c246 SpeedItUp (confidence 0.3%)

Sign: VEN OE (98%)

Captured video: 5.1 sec, (76, 240, 320, 3), 15.0 fps
Calculate optical flow on 40 frames ...
Execution time: 7.76 sec
Optical flow per frame: 0.194
Predict video with i3d_with_top ...
Top 1: [  4] c030 VEN OE (confidence 69.3%)
Top 2: [ 17] c246 SpeedItUp (confidence 14.5%)
Top 3: [  5] c049 TORMENTA (confidence 3.6%)

Sign: VEN OE (69%)

Captured video: 5.1 sec, (76, 240, 320, 3), 15.0 fps
Calculate optical flow on 40 frames ...
Execution time: 7.68 sec
Optical flow per frame: 0.192
Predict video with i3d_with_top ...
Top 1: [  4] c030 VEN OE (confidence 67.9%)
Top 2: [  5] c049 TORMENTA (confidence 20.7%)
Top 3: [ 17] c246 SpeedItUp (confidence 2.3

In [None]:
cam = cv2.VideoCapture(0)
while True:
    ret_val, img = cam.read()
    img = cv2.flip(img, 1)
    img = cv2.putText(img, 'OpenCV', (50, 50) , cv2.FONT_HERSHEY_SIMPLEX,  
                   1, (0,255,255), 2, cv2.LINE_AA) 
    cv2.imshow('my webcam', img)
    if cv2.waitKey(1) == 27: 
        break  # esc to quit
cv2.destroyAllWindows()

In [2]:
import cv2
# font 
font = cv2.FONT_HERSHEY_SIMPLEX 
  
# org 
org = (50, 50) 
  
# fontScale 
fontScale = 1
   
# Blue color in BGR 
color = (255, 0, 0) 
  
# Line thickness of 2 px 
thickness = 2
   
# Using cv2.putText() method 
image = cv2.putText(image, 'OpenCV', (50, 50) , cv2.FONT_HERSHEY_SIMPLEX,  
                   1, "yellow", 2, cv2.LINE_AA) 

NameError: name 'image' is not defined