Remove align_v2.

align_v2 manually computes the same affine transformation as align_v1
cmusatyalab · Jul 15, 2016 · 84576d2 · 84576d2
1 parent 87e5c04
commit 84576d2
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 170 deletions.
diff --git a/demos/classifier.py b/demos/classifier.py
@@ -69,8 +69,8 @@ def getRep(imgPath):
         print("Face detection took {} seconds.".format(time.time() - start))
 
     start = time.time()
-    alignedFace = align.align_v1(args.imgDim, rgbImg, bb,
-                                 landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
+    alignedFace = align.align(args.imgDim, rgbImg, bb,
+                              landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
     if alignedFace is None:
         raise Exception("Unable to align image: {}".format(imgPath))
     if args.verbose:

diff --git a/demos/compare.py b/demos/compare.py
@@ -81,8 +81,8 @@ def getRep(imgPath):
         print("  + Face detection took {} seconds.".format(time.time() - start))
 
     start = time.time()
-    alignedFace = align.align_v1(args.imgDim, rgbImg, bb,
-                                 landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
+    alignedFace = align.align(args.imgDim, rgbImg, bb,
+                              landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
     if alignedFace is None:
         raise Exception("Unable to align image: {}".format(imgPath))
     if args.verbose:

diff --git a/docs/usage.md b/docs/usage.md
@@ -17,8 +17,8 @@ net = openface.TorchNeuralNet(args.networkModel, args.imgDim, cuda=args.cuda)
 
 # `img` is a numpy matrix containing the RGB pixels of the image.
 bb = align.getLargestFaceBoundingBox(img)
-alignedFace = align.align_v1(args.imgDim, img, bb,
-                             landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
+alignedFace = align.align(args.imgDim, img, bb,
+                          landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
 rep1 = net.forward(alignedFace)
 
 # `rep2` obtained similarly.

diff --git a/openface/align_dlib.py b/openface/align_dlib.py
@@ -74,10 +74,8 @@ class AlignDlib:
     .. image:: ../images/dlib-landmark-mean.png
     """
 
-    #: Landmark indices corresponding to the inner eyes and bottom lip.
+    #: Landmark indices.
     INNER_EYES_AND_BOTTOM_LIP = [39, 42, 57]
-
-    #: Landmark indices corresponding to the outer eyes and nose.
     OUTER_EYES_AND_NOSE = [36, 45, 33]
 
     def __init__(self, facePredictor):
@@ -148,41 +146,8 @@ def findLandmarks(self, rgbImg, bb):
 
     def align(self, imgDim, rgbImg, bb=None,
               landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
-              skipMulti=False, version=1):
-        r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP, version=1)
-
-        Transform and align a face in an image.
-
-        :param imgDim: The edge length in pixels of the square the image is resized to.
-        :type imgDim: int
-        :param rgbImg: RGB image to process. Shape: (height, width, 3)
-        :type rgbImg: numpy.ndarray
-        :param bb: Bounding box around the face to align. \
-                   Defaults to the largest face.
-        :type bb: dlib.rectangle
-        :param landmarks: Detected landmark locations. \
-                          Landmarks found on `bb` if not provided.
-        :type landmarks: list of (x,y) tuples
-        :param landmarkIndices: The indices to transform to.
-        :type landmarkIndices: list of ints
-        :param skipMulti: Skip image if more than one face detected.
-        :type skipMulti: bool
-        :param version: The alignment version to use.
-        :type version: int
-        :return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
-        :rtype: numpy.ndarray
-        """
-        if version == 1:
-            return self.align_v1(imgDim, rgbImg, bb, landmarks, landmarkIndices, skipMulti)
-        elif version == 2:
-            return self.align_v2(imgDim, rgbImg, bb, landmarks, landmarkIndices, skipMulti)
-        else:
-            assert False
-
-    def align_v1(self, imgDim, rgbImg, bb=None,
-                 landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
-                 skipMulti=False):
-        r"""align_v1(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
+              skipMulti=False):
+        r"""align(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
 
         Transform and align a face in an image.
 
@@ -223,103 +188,3 @@ def align_v1(self, imgDim, rgbImg, bb=None,
         thumbnail = cv2.warpAffine(rgbImg, H, (imgDim, imgDim))
 
         return thumbnail
-
-    def align_v2(self, imgDim, rgbImg, bb=None,
-                 landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP,
-                 skipMulti=False):
-        r"""align_v2(imgDim, rgbImg, bb=None, landmarks=None, landmarkIndices=INNER_EYES_AND_BOTTOM_LIP)
-
-        Transform and align a face in an image.
-
-        Uses the inverse of the desired template output points to calculate a transformation
-        matrix that relates output pixel coordinates to input pixel coordinates.
-        The transform matrix is multiplied with the pixel coordinates of the output image and
-        returns the corresponding pixel coordinates in the input image and interpolates over those
-        pixels and assigns the value to the output pixel.
-
-        :author: Dante Knowles | godrek@gmail.com | https://github.com/Godrek
-        :param imgDim: The edge length in pixels of the square the image is resized to.
-        :type imgDim: int
-        :param rgbImg: RGB image to process. Shape: (height, width, 3)
-        :type rgbImg: numpy.ndarray
-        :param bb: Bounding box around the face to align. \
-                   Defaults to the largest face.
-        :type bb: dlib.rectangle
-        :param landmarks: Detected landmark locations. \
-                          Landmarks found on `bb` if not provided.
-        :type landmarks: list of (x,y) tuples
-        :param landmarkIndices: The indices to transform to.
-        :type landmarkIndices: list of ints
-        :return: The aligned RGB image. Shape: (imgDim, imgDim, 3)
-        :rtype: numpy.ndarray
-        """
-        assert imgDim is not None
-        assert rgbImg is not None
-        assert landmarkIndices is not None
-
-        if bb is None:
-            bb = self.getLargestFaceBoundingBox(rgbImg, skipMulti)
-            if bb is None:
-                return
-
-        if landmarks is None:
-            landmarks = self.findLandmarks(rgbImg, bb)
-
-        npLandmarks = np.float32(landmarks)
-        npLandmarkIndices = np.array(landmarkIndices)
-        templateLandmarks = MINMAX_TEMPLATE[npLandmarkIndices]
-
-        fidPoints = npLandmarks[npLandmarkIndices]
-
-        # create output pixel mat
-        templateMat = np.ones((3, 3), dtype=np.float32)
-        for i in range(3):
-            for j in range(2):
-                templateMat[i][j] = templateLandmarks[i][j] * imgDim
-
-        templateMat = np.transpose(inv(templateMat))
-
-        # create transformation matrix from output pixel coordinates to input
-        # pixel coordinates
-        H = np.zeros((2, 3), dtype=np.float32)
-        for i in range(3):
-            H[0][i] = fidPoints[0][0] * templateMat[0][i] + fidPoints[1][0] * \
-                templateMat[1][i] + fidPoints[2][0] * templateMat[2][i]
-            H[1][i] = fidPoints[0][1] * templateMat[0][i] + fidPoints[1][1] * \
-                templateMat[1][i] + fidPoints[2][1] * templateMat[2][i]
-
-        imgWidth = np.shape(rgbImg)[1]
-        imgHeight = np.shape(rgbImg)[0]
-        thumbnail = np.zeros((imgDim, imgDim, 3), np.uint8)
-
-        # interpolation from input image to output pixels using transformation mat H to compute
-        # which input coordinates map to output
-        for y in range(imgDim):
-            for x in range(imgDim):
-                xprime = x * H[0][1] + y * H[0][0] + H[0][2]
-                yprime = x * H[1][1] + y * H[1][0] + H[1][2]
-                tx = int(xprime)
-                ty = int(yprime)
-                horzOffset = 1
-                vertOffset = 1
-                if(tx < 0 or tx >= imgWidth or ty < 0 or ty >= imgHeight):
-                    continue
-                if(tx == imgWidth - 1):
-                    horzOffset = 0
-                if(ty == imgHeight - 1):
-                    vertOffset = 0
-                f1 = xprime - float(tx)
-                f2 = yprime - float(ty)
-                upperLeft = rgbImg[ty][tx]
-                upperRight = rgbImg[ty][tx + horzOffset]
-                bottomLeft = rgbImg[ty + vertOffset][tx]
-                bottomRight = rgbImg[ty + vertOffset][tx + horzOffset]
-
-                thumbnail[x][y][0] = upperLeft[0] * (1.0 - f1) * (1.0 - f2) + upperRight[0] * f1 * (
-                    1.0 - f2) + bottomLeft[0] * (1.0 - f1) * f2 + bottomRight[0] * f1 * f2
-                thumbnail[x][y][1] = upperLeft[1] * (1.0 - f1) * (1.0 - f2) + upperRight[1] * f1 * (
-                    1.0 - f2) + bottomLeft[1] * (1.0 - f1) * f2 + bottomRight[1] * f1 * f2
-                thumbnail[x][y][2] = upperLeft[2] * (1.0 - f1) * (1.0 - f2) + upperRight[2] * f1 * (
-                    1.0 - f2) + bottomLeft[2] * (1.0 - f1) * f2 + bottomRight[2] * f1 * f2
-
-        return thumbnail
diff --git a/tests/openface_api_tests.py b/tests/openface_api_tests.py
@@ -57,18 +57,11 @@ def test_pipeline():
     assert bb.top() == 193
     assert bb.bottom() == 859
 
-    alignedFace = align.align_v1(imgDim, rgbImg, bb,
-                                 landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
+    alignedFace = align.align(imgDim, rgbImg, bb,
+                              landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
     # assert np.isclose(norm(alignedFace), 7.61577)
 
     rep = net.forward(alignedFace)
     cosDist = scipy.spatial.distance.cosine(rep, np.ones(128))
     print(cosDist)
     assert np.isclose(cosDist, 0.938840385931)
-
-    alignedFace = align.align_v2(imgDim, rgbImg, bb,
-                                 landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
-    rep = net.forward(alignedFace)
-    cosDist = scipy.spatial.distance.cosine(rep, np.ones(128))
-    print(cosDist)
-    assert np.isclose(cosDist, 0.938487273221)
diff --git a/util/align-dlib.py b/util/align-dlib.py
@@ -84,13 +84,15 @@ def alignMain(args):
     # Shuffle so multiple versions can be run at once.
     random.shuffle(imgs)
 
-    if args.landmarks == 'outerEyesAndNose':
-        landmarkIndices = openface.AlignDlib.OUTER_EYES_AND_NOSE
-    elif args.landmarks == 'innerEyesAndBottomLip':
-        landmarkIndices = openface.AlignDlib.INNER_EYES_AND_BOTTOM_LIP
-    else:
+    landmarkMap = {
+        'outerEyesAndNose': openface.AlignDlib.OUTER_EYES_AND_NOSE,
+        'innerEyesAndBottomLip': openface.AlignDlib.INNER_EYES_AND_BOTTOM_LIP
+    }
+    if args.landmarks not in landmarkMap:
         raise Exception("Landmarks unrecognized: {}".format(args.landmarks))
 
+    landmarkIndices = landmarkMap[args.landmarks]
+
     align = openface.AlignDlib(args.dlibFacePredictor)
 
     nFallbacks = 0
@@ -111,14 +113,9 @@ def alignMain(args):
                     print("  + Unable to load.")
                 outRgb = None
             else:
-                if args.version == 1:
-                    outRgb = align.align_v1(args.size, rgb,
-                                            landmarkIndices=landmarkIndices,
-                                            skipMulti=args.skipMulti)
-                elif args.version == 2:
-                    outRgb = align.align_v2(args.size, rgb,
-                                            landmarkIndices=landmarkIndices,
-                                            skipMulti=args.skipMulti)
+                outRgb = align.align(args.size, rgb,
+                                     landmarkIndices=landmarkIndices,
+                                     skipMulti=args.skipMulti)
                 if outRgb is None and args.verbose:
                     print("  + Unable to align.")
 
@@ -156,7 +153,8 @@ def alignMain(args):
         'align', help='Align a directory of images.')
     alignmentParser.add_argument('landmarks', type=str,
                                  choices=['outerEyesAndNose',
-                                          'innerEyesAndBottomLip'],
+                                          'innerEyesAndBottomLip',
+                                          'eyes_1'],
                                  help='The landmarks to align to.')
     alignmentParser.add_argument(
         'outputDir', type=str, help="Output directory of aligned images.")
@@ -167,9 +165,6 @@ def alignMain(args):
     alignmentParser.add_argument(
         '--skipMulti', action='store_true', help="Skip images with more than one face.")
     alignmentParser.add_argument('--verbose', action='store_true')
-    alignmentParser.add_argument('--version', type=int,
-                                 choices=[1, 2],
-                                 help='The alignment version to use.', default=1)
 
     args = parser.parse_args()