Skip to content

Commit

Permalink
Performance work with longestCommonSubsequence
Browse files Browse the repository at this point in the history
  • Loading branch information
bgwines committed May 20, 2015
1 parent 275be9a commit ea999f7
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 34 deletions.
4 changes: 2 additions & 2 deletions filediff.cabal
Expand Up @@ -2,7 +2,7 @@
-- documentation, see http://haskell.org/cabal/users-guide/

name: filediff
version: 1.0.0.4
version: 1.0.0.5
synopsis: Diffing and patching module
description: `filediff` is a Haskell library for creating diffs, and applying diffs to files and directories.
homepage: https://github.com/bgwines/filediff
Expand All @@ -23,7 +23,7 @@ library
exposed-modules: Filediff, Filediff.Types, Filediff.Stats, Filediff.Printing
other-modules: Filediff.Utils
-- other-extensions:
build-depends: base >=4.7 && <4.8, mtl, time, directory, either, transformers, data-memocombinators, Zora >=1.1.22, text, data-default, tasty, tasty-hunit, rainbow, bytestring, threads
build-depends: base >=4.7 && <4.8, mtl, time, directory, either, transformers, data-memocombinators, Zora >=1.1.22, text, data-default, tasty, tasty-hunit, rainbow, bytestring, threads, hashmap
hs-source-dirs: src
default-language: Haskell2010

Expand Down
68 changes: 44 additions & 24 deletions src/Filediff.hs
Expand Up @@ -17,6 +17,9 @@ module Filediff
, applyToDirectory
) where

import Debug.Trace
import qualified Data.HashMap as HMap

import Control.Concurrent (forkIO)
import Control.Concurrent.Thread as Thread (Result(..))
import Control.Concurrent.Thread.Group as ThreadGroup (new, forkIO, wait)
Expand Down Expand Up @@ -217,7 +220,7 @@ mapMParallelWaitForAll f list = do
--
-- > λ diffLists "abcdefg" "wabxyze"
-- > ListDiff {dels = [(2,'c'),(3,'d'),(5,'f'),(6,'g')], adds = [(0,'w'),(3,'x'),(4,'y'),(5,'z')]}
diffLists :: forall a. (Eq a, MemoTable a) => [a] -> [a] -> ListDiff a
diffLists :: forall a. (Eq a) => [a] -> [a] -> ListDiff a
diffLists a b = ListDiff
(map (\i -> (i, a !! i)) $ nonSubsequenceIndices common a)
(getProgressiveIndicesToAdd common b)
Expand All @@ -229,7 +232,7 @@ diffLists a b = ListDiff
-- > [(0,"w"),(3,"x"),(4,"y")]
-- > λ common
-- > ["a","b","e"]
getProgressiveIndicesToAdd :: (Eq a) => [a] -> [a] -> [(Int, a)]
getProgressiveIndicesToAdd :: [a] -> [a] -> [(Int, a)]
getProgressiveIndicesToAdd sub super =
map (\i -> (i, super !! i)) $ nonSubsequenceIndices sub super

Expand Down Expand Up @@ -283,40 +286,57 @@ longestCommonSubsequenceWrapper xs ys =
getCommonPrefix :: [a] -> [a] -> [a]
getCommonPrefix as bs = map fst . takeWhile (uncurry (==)) $ zip as bs

-- xs = abcd***efg
-- ys = abcd???????efg
-- getMiddle xs == ****
-- getMiddle ys = ??????
getMiddle :: [a] -> [a]
getMiddle elems = take (length elems - length commonPrefix - length commonSuffix) . drop (length commonPrefix) $ elems

-- | Compute the longest common (potentially noncontiguous) subsequence
-- between two sequences. Element type is fixed because memoization
-- requires a static type.
longestCommonSubsequence :: forall a. (Eq a) => [a] -> [a] -> [a]
longestCommonSubsequence xs ys = longestCommonSubsequence' xs ys 0 0

-- optimization: hash lines
-- | Compute the longest common (potentially noncontiguous) subsequence
-- between two sequences. Element type is fixed because memoization
-- requires a static type.
longestCommonSubsequence' :: forall a. (Eq a) =>
[a] -> [a] -> Int -> Int -> [a]
longestCommonSubsequence' xs ys i j
= (Memo.memo2 Memo.integral Memo.integral
(longestCommonSubsequence'' xs ys)) i j
longestCommonSubsequence xs ys = longestCommonSubsequence' 0 0
where
longestCommonSubsequence'' :: [a] -> [a] -> Int -> Int -> [a]
longestCommonSubsequence'' [] _ _ _ = []
longestCommonSubsequence'' _ [] _ _ = []
longestCommonSubsequence'' (x:xs) (y:ys) i j =
if x == y
then x : (longestCommonSubsequence' xs ys (i + 1) (j + 1)) -- WLOG
else if (length caseX) > (length caseY)
then caseX
else caseY
-- TODO: UArray?
xs' :: HMap.Map Int a
xs' = foldl update HMap.empty (zip [0..] xs)

ys' :: HMap.Map Int a
ys' = foldl update HMap.empty (zip [0..] ys)

update :: HMap.Map Int a -> (Int, a) -> HMap.Map Int a
update hmap (i, a) = HMap.insert i a hmap

xsLength :: Int
xsLength = length xs

ysLength :: Int
ysLength = length ys

longestCommonSubsequence' :: Int -> Int -> [a]
longestCommonSubsequence' = Memo.memo2 Memo.integral Memo.integral longestCommonSubsequence''

longestCommonSubsequence'' :: Int -> Int -> [a]
longestCommonSubsequence'' i j
| i == xsLength = []
| j == ysLength = []
| x == y = x : longestCommonSubsequence' (i + 1) (j + 1) -- WLOG
| length caseX > length caseY = caseX
| otherwise = caseY
where
x :: a
x = xs' HMap.! i

y :: a
y = ys' HMap.! j

caseX :: [a]
caseX = longestCommonSubsequence' xs (y:ys) (i+1) j
caseX = longestCommonSubsequence' (i + 1) j

caseY :: [a]
caseY = longestCommonSubsequence' (x:xs) ys i (j+1)
caseY = longestCommonSubsequence' i (j + 1)

-- | When `sub` is a (not necessarily contiguous) subsequence of `super`,
-- get the index at which each element of `sub` appears. E.g.
Expand Down
9 changes: 1 addition & 8 deletions src/Filediff/Types.hs
Expand Up @@ -31,9 +31,6 @@ import Data.List (find, intersect, intersectBy, sortBy, (\\))
import Data.Monoid
import Control.Applicative

import Data.MemoCombinators (Memo, wrap)
import Data.MemoCombinators.Class (MemoTable, table, memoize)

-- | Diff between two lists. `dels` represents the indices
-- at which to delete, and `adds` represents the indices and
-- contents to add.
Expand All @@ -46,7 +43,7 @@ instance Default (ListDiff a) where
def :: ListDiff a
def = ListDiff [] []

instance (Eq a, Ord a, MemoTable a) => Monoid (ListDiff a) where
instance (Eq a, Ord a) => Monoid (ListDiff a) where
mempty :: ListDiff a
mempty = ListDiff [] []

Expand Down Expand Up @@ -220,10 +217,6 @@ instance Default Diff where
def :: Diff
def = Diff []

instance MemoTable T.Text where
table :: Memo T.Text
table = wrap T.pack T.unpack table

instance Monoid Diff where
mempty :: Diff
mempty = Diff []
Expand Down

0 comments on commit ea999f7

Please sign in to comment.