Skip to content

Commit

Permalink
separate modules for DocIdSet and DocIdMap, newtype DocId
Browse files Browse the repository at this point in the history
  • Loading branch information
Uwe Schmidt committed May 2, 2014
1 parent c41a6bd commit 731293c
Show file tree
Hide file tree
Showing 15 changed files with 223 additions and 228 deletions.
19 changes: 11 additions & 8 deletions hunt-compression/src/Hunt/Common/Occurrences/Compression.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE TypeSynonymInstances #-}

-- ----------------------------------------------------------------------------

Expand All @@ -22,11 +22,14 @@ module Hunt.Common.Occurrences.Compression
)
where

import qualified Hunt.Common.DocIdMap as DM
import Hunt.Common.Occurrences hiding (delete)
import Data.Typeable
import Control.DeepSeq

import Data.Binary
import Data.Typeable

import qualified Hunt.Common.DocIdMap as DocIdMap
import Hunt.Common.DocIdSet (DocIdSet)
import Hunt.Common.Occurrences hiding (delete)

-- ------------------------------------------------------------

Expand All @@ -40,7 +43,7 @@ class OccCompression cv where
-- XXX: not sure if this is needed/used anymore
-- | Delete a set of documents efficiently.
-- Depending on the implementation, the compressed data type may not have to be decompressed.
differenceWithKeySet :: DM.DocIdSet -> cv -> cv
differenceWithKeySet :: DocIdSet -> cv -> cv

-- ------------------------------------------------------------

Expand All @@ -64,11 +67,11 @@ instance Binary StrictOccurrences where
instance OccCompression StrictOccurrences where
compressOcc = mkStrictOcc
decompressOcc = unSOcc
differenceWithKeySet s x = compressOcc $ DM.diffWithSet (decompressOcc x) s
differenceWithKeySet s x = compressOcc $ DocIdMap.diffWithSet (decompressOcc x) s

instance OccCompression Occurrences where
compressOcc = id
decompressOcc = id
differenceWithKeySet = flip DM.diffWithSet
differenceWithKeySet = flip DocIdMap.diffWithSet

-- ------------------------------------------------------------
1 change: 1 addition & 0 deletions hunt-searchengine/hunt-searchengine.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ library
Hunt.Common.DocDesc
Hunt.Common.DocId
Hunt.Common.DocIdMap
Hunt.Common.DocIdSet
Hunt.Common.Occurrences
Hunt.Common.Positions
Hunt.Common.RawResult
Expand Down
12 changes: 7 additions & 5 deletions hunt-searchengine/src/Hunt/Common.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module Hunt.Common
module Hunt.Common.BasicTypes
, module Hunt.Common.DocId
, module Hunt.Common.DocIdMap
, module Hunt.Common.DocIdSet
, module Hunt.Common.Document
, module Hunt.Common.Occurrences
, module Hunt.Common.Positions
Expand All @@ -27,12 +28,13 @@ module Hunt.Common
)
where

import Hunt.Common.ApiDocument (ApiDocument (..))
import Hunt.Common.ApiDocument (ApiDocument (..))
import Hunt.Common.BasicTypes
import Hunt.Common.DocId
import Hunt.Common.DocIdMap (DocIdMap, DocIdSet)
import Hunt.Common.Document (Document (..))
import Hunt.Common.Occurrences (Occurrences)
import Hunt.Common.Positions (Positions)
import Hunt.Common.DocIdMap (DocIdMap)
import Hunt.Common.DocIdSet (DocIdSet)
import Hunt.Common.Document (Document (..))
import Hunt.Common.Occurrences (Occurrences)
import Hunt.Common.Positions (Positions)
import Hunt.Common.RawResult
import Hunt.Index.Schema
23 changes: 12 additions & 11 deletions hunt-searchengine/src/Hunt/Common/DocId.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import qualified Data.Binary as B
import Data.Digest.Murmur64

-- ------------------------------------------------------------

{-
-- | The unique identifier of a document.
type DocId = Int
Expand All @@ -50,26 +50,27 @@ fromInteger = fromIntegral
{-# INLINE mkFirst #-}
{-# INLINE fromInteger #-}
-- -}
-- ------------------------------------------------------------

-- the wrapped DocId
-- currently only used for JSON debug output

newtype DocId' = DocId' {unDocId' :: Int}
newtype DocId = DocId {unDocId :: Int}
deriving (Eq, Ord)

instance Show DocId' where
show = toHex . unDocId'
instance Show DocId where
show = toHex . unDocId

instance Binary DocId' where
put = put . unDocId'
get = DocId' <$> get
instance Binary DocId where
put = put . unDocId
get = DocId <$> get

instance ToJSON DocId' where
toJSON (DocId' i) = toJSON $ toHex i
instance ToJSON DocId where
toJSON (DocId i) = toJSON $ toHex i

mkDocId' :: Binary a => a -> DocId'
mkDocId' = DocId' . fromIntegral . asWord64 . hash64 . B.encode
mkDocId :: Binary a => a -> DocId
mkDocId = DocId . fromIntegral . asWord64 . hash64 . B.encode

toHex :: Int -> String
toHex !y = "0x" ++ toX 16 "" y
Expand Down
107 changes: 17 additions & 90 deletions hunt-searchengine/src/Hunt/Common/DocIdMap.hs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
Module : Hunt.Index.Common.DocIdMap
Copyright : Copyright (C) 2012 Sebastian M. Schlatt, Timo B. Huebel, Uwe Schmidt
License : MIT
Maintainer : Timo B. Huebel (tbh@holumbus.org)
Stability : experimental
Portability: none portable
Maintainer : Uwe Schmidt
Efficient Map implementation for 'DocId's.
-}
Expand All @@ -21,7 +18,6 @@

module Hunt.Common.DocIdMap
( DocIdMap(..)
, DocIdSet
, empty
, singleton
, null
Expand All @@ -31,9 +27,6 @@ module Hunt.Common.DocIdMap
, delete
, insertWith
, size
, minKey
, maxKey
, isIntervall
, union
, intersection
, difference
Expand All @@ -54,11 +47,6 @@ module Hunt.Common.DocIdMap
, toList
, keys
, elems
, toDocIdSet

, DocIdSet'(..)
, toDocIdSet'
, toList'DocIdSet'
)
where

Expand All @@ -67,6 +55,7 @@ import Prelude hiding (filter, foldr, lookup, map,
import qualified Prelude as P

import Control.Applicative (Applicative (..), (<$>))
import Control.Arrow (first)
import Control.DeepSeq
import Control.Monad (foldM, mzero)

Expand All @@ -75,55 +64,12 @@ import Data.Binary (Binary (..))
import Data.Foldable hiding (fold, foldr, toList)
import qualified Data.HashMap.Strict as HM
import qualified Data.IntMap.BinTree.Strict as IM
import qualified Data.IntSet as S
import qualified Data.List as L
import qualified Data.Text as T
import Data.Typeable

import Hunt.Common.DocId
import qualified Hunt.Common.DocId as DId

-- ------------------------------------------------------------

-- TODO: maybe move DocIdSet to separate module.

-- | A set of 'DocId's.
type DocIdSet = S.IntSet

-- | Create a 'DocIdSet' from a list.
toDocIdSet :: [DocId] -> DocIdSet
toDocIdSet = S.fromList

-- ------------------------------------------------------------
--
-- the wrapped DocId set
-- currently only used for JSON debug output

newtype DocIdSet' = DIS { unDIS :: S.IntSet }
deriving (Eq, Show, NFData, Typeable)

instance ToJSON DocIdSet' where
toJSON = toJSON . L.map DocId' . S.toList . unDIS

instance FromJSON DocIdSet' where
parseJSON x = do l <- parseJSON x
case fromL l of
Nothing -> mzero
Just s -> return $ DIS s
where
fromL :: [String] -> Maybe S.IntSet
fromL = L.foldr ins (Just S.empty)
where
ins _ Nothing = Nothing
ins xs (Just s) = case fromHex xs of
Nothing -> Nothing
Just i -> Just $ S.insert i s

toDocIdSet' :: [DocId'] -> DocIdSet'
toDocIdSet' = DIS . S.fromList . L.map unDocId'

toList'DocIdSet' :: DocIdSet' -> [DocId']
toList'DocIdSet' = L.map DocId' . S.toList . unDIS
import Hunt.Common.DocIdSet (DocIdSet (..))

-- ------------------------------------------------------------

Expand Down Expand Up @@ -180,53 +126,37 @@ null = IM.null . unDIM

-- | Is the 'DocId' member of the map?
member :: DocId -> DocIdMap v -> Bool
member x = IM.member x . unDIM
member x = IM.member (unDocId x) . unDIM

-- | Lookup the value at a 'DocId' in the map.

-- The function will return the corresponding value as @('Just' value)@,
-- or 'Nothing' if the 'DocId' isn't in the map.
lookup :: DocId -> DocIdMap v -> Maybe v
lookup x = IM.lookup x . unDIM
lookup x = IM.lookup (unDocId x) . unDIM

-- | Insert a 'DocId' and value in the map.
-- If the 'DocId' is already present in the map, the associated value is replaced with the supplied
-- value. 'insert' is equivalent to 'insertWith' 'const'.
insert :: DocId -> v -> DocIdMap v -> DocIdMap v
insert x y = liftDIM $ IM.insert x y
insert x y = liftDIM $ IM.insert (unDocId x) y

-- | Delete a 'DocId' and its value from the map.
-- When the 'DocId' is not a member of the map, the original map is returned.
delete :: DocId -> DocIdMap v -> DocIdMap v
delete x = liftDIM $ IM.delete x
delete x = liftDIM $ IM.delete (unDocId x)

-- | Insert with a function, combining new value and old value.
-- @insertWith f docId value mp@ will insert the pair @(docId, value)@ into @mp@ if @docId@ does
-- not exist in the map. If the 'DocId' does exist, the function will insert the pair
-- @(docId, f new_value old_value)@.
insertWith :: (v -> v -> v) -> DocId -> v -> DocIdMap v -> DocIdMap v
insertWith f x y = liftDIM $ IM.insertWith f x y
insertWith f x y = liftDIM $ IM.insertWith f (unDocId x) y

-- | The number of elements in the map.
size :: DocIdMap v -> Int
size = IM.size . unDIM

-- | The minimum 'DocId' of the map.
minKey :: DocIdMap v -> DocId
minKey = maybe DId.mkNull (fst . fst) . IM.minViewWithKey . unDIM

-- | The maximum 'DocId' of the map.
maxKey :: DocIdMap v -> DocId
maxKey = maybe DId.mkNull (fst . fst) . IM.maxViewWithKey . unDIM

-- | Are the 'DocId's assigned sequentially such that @'maxKey' m - 'minKey' m == 'size' m@.
isIntervall :: DocIdMap v -> Bool
isIntervall m = null m
||
( maxKey m - minKey m
== size m - 1
)

-- | The (left-biased) union of two maps.
-- It prefers the first map when duplicate 'DocId' are encountered,
-- i.e. @(union == unionWith const)@.
Expand All @@ -243,7 +173,7 @@ difference = liftDIM2 $ IM.difference

-- | Difference between the map and a set of 'DocId's.
diffWithSet :: DocIdMap v -> DocIdSet -> DocIdMap v
diffWithSet m s = m `difference` (DIM $ IM.fromSet (const ()) s)
diffWithSet m s = m `difference` (DIM $ IM.fromSet (const ()) (unDIS s))

-- | The union with a combining function.
unionWith :: (v -> v -> v) -> DocIdMap v -> DocIdMap v -> DocIdMap v
Expand All @@ -267,15 +197,15 @@ map f = liftDIM $ IM.map f

-- | Map a function over all values in the map.
mapWithKey :: (DocId -> v -> r) -> DocIdMap v -> DocIdMap r
mapWithKey f = liftDIM $ IM.mapWithKey f
mapWithKey f = liftDIM $ IM.mapWithKey (f . DocId)

-- | Filter all values that satisfy some predicate.
filter :: (v -> Bool) -> DocIdMap v -> DocIdMap v
filter p = liftDIM $ IM.filter p

-- | Filter all 'DocId's/values that satisfy some predicate.
filterWithKey :: (DocId -> v -> Bool) -> DocIdMap v -> DocIdMap v
filterWithKey p = liftDIM $ IM.filterWithKey p
filterWithKey p = liftDIM $ IM.filterWithKey (p . DocId)

-- | @'traverseWithKey' f s == 'fromList' <$> 'traverse' (\(k, v) -> (,) k <$> f k v) ('toList' m)@
-- That is, behaves exactly like a regular 'traverse' except that the traversing
Expand All @@ -284,7 +214,7 @@ filterWithKey p = liftDIM $ IM.filterWithKey p
-- > traverseWithKey (\k v -> if odd k then Just (succ v) else Nothing) (fromList [(1, 'a'), (5, 'e')]) == Just (fromList [(1, 'b'), (5, 'f')])
-- > traverseWithKey (\k v -> if odd k then Just (succ v) else Nothing) (fromList [(2, 'c')]) == Nothing
traverseWithKey :: Applicative t => (DocId -> a -> t b) -> DocIdMap a -> t (DocIdMap b)
traverseWithKey f = (pure DIM <*>) . IM.traverseWithKey f . unDIM
traverseWithKey f = (pure DIM <*>) . IM.traverseWithKey (f . DocId) . unDIM

-- | Fold the values in the map using the given right-associative binary operator, such that
-- @'foldr' f z == 'Prelude.foldr' f z . 'elems'@.
Expand All @@ -309,25 +239,25 @@ foldr f u = IM.foldr f u . unDIM
-- > let f k a result = result ++ "(" ++ (show k) ++ ":" ++ a ++ ")"
-- > foldrWithKey f "Map: " (fromList [(5,"a"), (3,"b")]) == "Map: (5:a)(3:b)"
foldrWithKey :: (DocId -> v -> b -> b) -> b -> DocIdMap v -> b
foldrWithKey f u = IM.foldrWithKey f u . unDIM
foldrWithKey f u = IM.foldrWithKey (f . DocId) u . unDIM

-- | Create a map from a list of 'DocId'\/value pairs.
fromList :: [(DocId, v)] -> DocIdMap v
fromList = DIM . IM.fromList
fromList = DIM . IM.fromList . L.map (first unDocId)

-- | Build a map from a list of 'DocId'\/value pairs where the 'DocId's are in ascending order.
fromAscList :: [(DocId, v)] -> DocIdMap v
fromAscList = DIM . IM.fromAscList
fromAscList = DIM . IM.fromAscList . L.map (first unDocId)

-- | Convert the map to a list of 'DocId'\/value pairs.
-- Subject to list fusion.
toList :: DocIdMap v -> [(DocId, v)]
toList = IM.toList . unDIM
toList = L.map (first DocId) . IM.toList . unDIM

-- | Return all 'DocId's of the map in ascending order.
-- Subject to list fusion.
keys :: DocIdMap v -> [DocId]
keys = {- L.map DocId' . -} IM.keys . unDIM
keys = L.map DocId . IM.keys . unDIM

-- | Return all elements of the map in the ascending order of their 'DocId's.
-- Subject to list fusion.
Expand All @@ -347,9 +277,6 @@ elems = IM.elems . unDIM
{-# INLINE delete #-}
{-# INLINE insertWith #-}
{-# INLINE size #-}
{-# INLINE minKey #-}
{-# INLINE maxKey #-}
{-# INLINE isIntervall #-}
{-# INLINE union #-}
{-# INLINE difference #-}
{-# INLINE unionWith #-}
Expand Down
Loading

0 comments on commit 731293c

Please sign in to comment.