Skip to content
This repository has been archived by the owner on Jun 13, 2020. It is now read-only.

Commit

Permalink
Renamed PHashMap -> HashMap, removed PVector
Browse files Browse the repository at this point in the history
At this point, I think there's no reason to add yet another vector class to the
Haskell ecosystem.  There already exists Data.Vector and Data.Sequence which
seem to provide what I was trying to provide, but better, more complete, and
faster.

Also moved BitUtil.hs in the process
  • Loading branch information
exclipy committed Jan 8, 2011
1 parent 7921843 commit a7223e0
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 134 deletions.
2 changes: 1 addition & 1 deletion BitUtil.hs → Data/BitUtil.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module BitUtil (fromBitmap, toBitmap, bitmapToIndices, bitCount32) where
module Data.BitUtil where

import Data.Bits
import Data.Word
Expand Down
90 changes: 45 additions & 45 deletions Data/PHashMap.hs → Data/HashMap.hs
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
module Data.PHashMap (
-- * PHashMap type
PHashMap
module Data.HashMap (
-- * HashMap type
HashMap
-- * Operators
, (Data.PHashMap.!)
, (Data.HashMap.!)
-- * Query
, member
, notMember
, Data.PHashMap.lookup
, Data.HashMap.lookup
-- * Construction
, empty
, singleton
-- * Insertion
, insert
, insertWith
-- * Delete\/Update
, Data.PHashMap.delete
, Data.HashMap.delete
, adjust
, update
, alter
-- * Traversal
, Data.PHashMap.map
, Data.HashMap.map
, mapWithKey
-- * Conversion
, Data.PHashMap.elems
, Data.HashMap.elems
, keys
, toList
, fromList
) where

import BitUtil
import Data.BitUtil
import Control.Monad
import Control.DeepSeq
import Data.Bits
Expand All @@ -37,17 +37,17 @@ import Data.List hiding (insert, lookup)
import Data.Array as A
import Prelude as P

-- | A PHashMap from keys @k@ to values @v@
data (Eq k) => PHashMap k v = PHM {
-- | A HashMap from keys @k@ to values @v@
data (Eq k) => HashMap k v = HM {
hashFn :: k -> Int32
, root :: Node k v
}

instance (Eq k, Show k, Show v) => Show (PHashMap k v) where
show = ("fromList hashFn "++).show.(Data.PHashMap.toList)
instance (Eq k, Show k, Show v) => Show (HashMap k v) where
show = ("fromList hashFn "++).show.(Data.HashMap.toList)

instance (Eq k, NFData k, NFData v) => NFData (PHashMap k v) where
rnf (PHM f r) = f `seq` rnf r
instance (Eq k, NFData k, NFData v) => NFData (HashMap k v) where
rnf (HM f r) = f `seq` rnf r

instance (Eq k, NFData k, NFData v) => NFData (Node k v) where
rnf EmptyNode = ()
Expand Down Expand Up @@ -97,16 +97,16 @@ isEmptyNode _ = False
hashFragment shift hash = (hash `shiftR` shift) .&. fromIntegral mask


-- | @('empty' hashFn)@ is the empty PHashMap, with hashFn being the key hash function.
empty :: (Eq k) => (k -> Int32) -> PHashMap k v
-- | @('empty' hashFn)@ is the empty HashMap, with hashFn being the key hash function.
empty :: (Eq k) => (k -> Int32) -> HashMap k v

empty hashFn = PHM hashFn EmptyNode
empty hashFn = HM hashFn EmptyNode


-- | @('singleton' hashFn key value)@ is a single-element PHashMap holding @(key, value)@
singleton :: (Eq k) => (k -> Int32) -> k -> v -> PHashMap k v
-- | @('singleton' hashFn key value)@ is a single-element HashMap holding @(key, value)@
singleton :: (Eq k) => (k -> Int32) -> k -> v -> HashMap k v

singleton hashFn key value = PHM hashFn $ LeafNode (hashFn key) key value
singleton hashFn key value = HM hashFn $ LeafNode (hashFn key) key value


-- Helper data type for alterNode
Expand Down Expand Up @@ -137,10 +137,10 @@ combineNodes shift node node' =
-- | The expression (@'alter' f k map@) alters the value @x@ at @k@, or absence thereof.
-- 'alter' can be used to insert, delete, or update a value in a 'Map'.
-- In short : @'lookup' k ('alter' f k m) = f ('lookup' k m)@.
alter :: (Eq k) => (Maybe v -> Maybe v) -> k -> PHashMap k v -> PHashMap k v
alter :: (Eq k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v

alter updateFn key (PHM hashFn root) =
PHM hashFn $ alterNode 0 updateFn (hashFn key) key root
alter updateFn key (HM hashFn root) =
HM hashFn $ alterNode 0 updateFn (hashFn key) key root


alterNode :: (Eq k) => Int -> (Maybe v -> Maybe v) -> Int32 -> k -> Node k v -> Node k v
Expand Down Expand Up @@ -270,7 +270,7 @@ alterNode shift updateFn hash key node@(ArrayNode numChildren subNodes) =
-- will insert the pair (key, value) into @mp@ if key does
-- not exist in the map. If the key does exist, the function will
-- insert the pair @(key, f new_value old_value)@.
insertWith :: (Eq k) => (v -> v -> v) -> k -> v -> PHashMap k v -> PHashMap k v
insertWith :: (Eq k) => (v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v

insertWith accumFn key value hashMap =
let fn :: (v -> v -> v) -> v -> Maybe v -> Maybe v
Expand All @@ -283,38 +283,38 @@ insertWith accumFn key value hashMap =
-- If the key is already present in the map, the associated value is
-- replaced with the supplied value. 'insert' is equivalent to
-- @'insertWith' 'const'@.
insert :: (Eq k) => k -> v -> PHashMap k v -> PHashMap k v
insert :: (Eq k) => k -> v -> HashMap k v -> HashMap k v

insert = insertWith const


-- | The expression (@'update' f k map@) updates the value @x@
-- at @k@ (if it is in the map). If (@f x@) is 'Nothing', the element is
-- deleted. If it is (@'Just' y@), the key @k@ is bound to the new value @y@.
update :: (Eq k) => (v -> Maybe v) -> k -> PHashMap k v -> PHashMap k v
update :: (Eq k) => (v -> Maybe v) -> k -> HashMap k v -> HashMap k v

update updateFn = alter ((=<<) updateFn)


-- | Delete a key and its value from the map. When the key is not
-- a member of the map, the original map is returned.
delete :: (Eq k) => k -> PHashMap k v -> PHashMap k v
delete :: (Eq k) => k -> HashMap k v -> HashMap k v

delete = alter (const Nothing)


-- | Update a value at a specific key with the result of the provided function.
-- When the key is not a member of the map, the original map is returned.
adjust :: (Eq k) => (v -> v) -> k -> PHashMap k v -> PHashMap k v
adjust :: (Eq k) => (v -> v) -> k -> HashMap k v -> HashMap k v

adjust updateFn = update ((Just).updateFn)


-- | Map a function over all values in the map.
mapWithKey :: (Eq k) => (k -> v -> v) -> PHashMap k v -> PHashMap k v
mapWithKey :: (Eq k) => (k -> v -> v) -> HashMap k v -> HashMap k v

mapWithKey mapFn (PHM hashFn root) =
PHM hashFn $ mapWithKeyNode mapFn root
mapWithKey mapFn (HM hashFn root) =
HM hashFn $ mapWithKeyNode mapFn root


mapWithKeyNode :: (Eq k) => (k -> v -> v) -> Node k v -> Node k v
Expand All @@ -334,7 +334,7 @@ mapWithKeyNode mapFn (ArrayNode numChildren subNodes) =


-- | Map a function over all values in the map.
map :: (Eq k) => (v -> v) -> PHashMap k v -> PHashMap k v
map :: (Eq k) => (v -> v) -> HashMap k v -> HashMap k v

map fn = mapWithKey (const fn)

Expand All @@ -348,9 +348,9 @@ arrayMap fn arr = array (bounds arr) $ P.map (\(key, value) -> (key, fn value))
--
-- The function will return the corresponding value as @('Just' value)@,
-- or 'Nothing' if the key isn't in the map.
lookup :: (Eq k) => k -> PHashMap k v -> Maybe v
lookup :: (Eq k) => k -> HashMap k v -> Maybe v

lookup key (PHM hashFn root) = lookupNode 0 (hashFn key) key root
lookup key (HM hashFn root) = lookupNode 0 (hashFn key) key root


lookupNode :: (Eq k) => Int -> Int32 -> k -> Node k v -> Maybe v
Expand Down Expand Up @@ -379,28 +379,28 @@ lookupNode shift hash key (ArrayNode _numChildren subNodes) =

-- | Find the value at a key.
-- Calls 'error' when the element can not be found.
(!) :: (Eq k) => PHashMap k v -> k -> v
(!) :: (Eq k) => HashMap k v -> k -> v

hashMap ! key = maybe (error "element not in the map")
id
(Data.PHashMap.lookup key hashMap)
(Data.HashMap.lookup key hashMap)


-- | Is the key a member of the map? See also 'notMember'.
member :: (Eq k) => k -> PHashMap k v -> Bool
member :: (Eq k) => k -> HashMap k v -> Bool

member key hashMap = maybe False (const True) (Data.PHashMap.lookup key hashMap)
member key hashMap = maybe False (const True) (Data.HashMap.lookup key hashMap)

-- | Is the key a member of the map? See also 'member'.
notMember :: (Eq k) => k -> PHashMap k v -> Bool
notMember :: (Eq k) => k -> HashMap k v -> Bool

notMember key = not.(member key)


-- | Convert to a list of key\/value pairs.
toList :: (Eq k) => PHashMap k v -> [(k, v)]
toList :: (Eq k) => HashMap k v -> [(k, v)]

toList (PHM _hashFn root) = toListNode root
toList (HM _hashFn root) = toListNode root


toListNode :: (Eq k) => Node k v -> [(k, v)]
Expand All @@ -421,20 +421,20 @@ toListNode (ArrayNode _numChildren subNodes) =
-- | Build a map from a list of key\/value pairs.
-- If the list contains more than one value for the same key, the last value
-- for the key is retained.
fromList :: (Eq k) => (k -> Int32) -> [(k, v)] -> PHashMap k v
fromList :: (Eq k) => (k -> Int32) -> [(k, v)] -> HashMap k v

fromList hashFn = foldl' (\hm (key, value) -> insert key value hm)
(empty hashFn)
-- TODO: make this more efficient by using a transient array


-- | Return all keys of the map.
keys :: (Eq k) => PHashMap k v -> [k]
keys :: (Eq k) => HashMap k v -> [k]

keys = (P.map fst).toList


-- | Return all elements of the map.
elems :: (Eq k) => PHashMap k v -> [v]
elems :: (Eq k) => HashMap k v -> [v]

elems = (P.map snd).toList
92 changes: 18 additions & 74 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,77 +2,24 @@ Persistent Vectors and HashMaps for Haskell
===========================================

One of the prominent features of the [Clojure][1] language are a set of
[immutable data structures][2] with efficient manipulation operations. Two of
the most innovative and important are the persistent vector and persistent hash
map.
[immutable data structures][2] with efficient manipulation operations. One of
the most innovative and important is the persistent hash map based on the
*hash array mapped trie*.

This project is a port of these structures to Haskell. The API provides
Data.PVector (the persistent vector) and Data.PHashMap (the persistent hash
map). The interface for both has been kept as consistent as possible with
Data.Map.
This project is a port of this structure to Haskell, as Data.HashMap. The
interface has been kept as consistent as possible with Data.Map.

[1]: http://clojure.org/
[2]: http://clojure.org/datatypes


Basic usage
-----------
These two data structures are:
Here's a demo of what you can do with a HashMap:

* Immutable. Unlike Data.Array.ST and Data.HashTable, there are no monads in
sight.
* Persistent. They provide "update" operations which do not destroy the
original structure.
* Efficient. Unlike Data.Array, updating a PVector or PHashMap doesn't copy
the entire structure, but only the changed path in the internal tree
representation.

Here's a demo of what you can do with a PVector:

ghci> :m + Data.PVector
ghci> empty -- the empty pvector
fromList []

ghci> append 1 it
fromList [1]

ghci> append 42 it
fromList [1,42]

ghci> append 13 it
fromList [1,42,13]

ghci> let a = it
ghci> a ! 0 -- indexes are from 0 to n-1
1

ghci> a ! 1
42

ghci> a ! 2
13

ghci> set 1 71 a -- a new PVector with the element replaced
fromList [1,71,13]

ghci> adjust succ 2 a -- apply a function to a single element
fromList [1,42,14]

ghci> Data.PVector.map succ a -- apply a function to all elements
fromList [2,43,14]

ghci> fromList [1..10] -- convert a list to a PVector
fromList [1,2,3,4,5,6,7,8,9,10]

ghci> elems it -- convert a PVector to a list
[1,2,3,4,5,6,7,8,9,10]


And here's a demo of the basic functionality of PHashMap:

ghci> :m + Data.PHashMap
ghci> :m + Data.HashMap
ghci> empty Data.HashTable.hashString
-- an empty PHashMap (requires a key hash function)
-- an empty HashMap (requires a key hash function)
fromList hashFn []

ghci> insert "foo" 1 it
Expand All @@ -94,16 +41,16 @@ And here's a demo of the basic functionality of PHashMap:
ghci> a ! "baz" -- using (!) is unsafe
*** Exception: array index out of range: element not in the map

ghci> Data.PHashMap.lookup "bar" a
ghci> Data.HashMap.lookup "bar" a
Just 42

ghci> Data.PHashMap.lookup "baz" a -- 'lookup' returns a safe Maybe
ghci> Data.HashMap.lookup "baz" a -- 'lookup' returns a safe Maybe
Nothing

ghci> adjust succ "foo" a -- apply a function to a value
fromList hashFn [("qux",13),("foo",2),("bar",42)]

ghci> Data.PHashMap.map succ a -- apply a function to all values
ghci> Data.HashMap.map succ a -- apply a function to all values
fromList hashFn [("qux",14),("foo",2),("bar",43)]

ghci> keys a
Expand Down Expand Up @@ -131,20 +78,18 @@ To try it yourself, just do the usual:
Performance
-----------

The single-element operations for each of these structures technically run in
logarithmic time. However, it is implemented as a 32-ary tree, which means it
never exceeds a depth of 7 nodes, so you can treat them as constant-time
operations (for relatively large constants).
The single-element operations for the hash map technically runs in logarithmic
time. However, it is implemented as a 32-ary tree, which means it never exceeds
a depth of 7 nodes, so you can treat them as constant-time operations (for
relatively large constants).

How it works
------------

I wrote this code after reading the following explanatory blog posts on how they
work in Clojure. They should also provide a decent birds-eye overview of my
Haskell implementation.
I wrote this code after reading the following explanatory blog posts on how the
Clojure version works. They should also provide a decent birds-eye overview of
my Haskell implementation.

* [Understanding Clojure’s PersistentVector implementation
](http://blog.higher-order.net/2009/02/01/understanding-clojures-persistentvector-implementation/)
* [Understanding Clojure’s PersistentHashMap
](http://blog.higher-order.net/2009/09/08/understanding-clojures-persistenthashmap-deftwice/)
* [Assoc and Clojure’s PersistentHashMap: part II
Expand All @@ -158,6 +103,5 @@ To do (help appreciated!)
* More strictness
* A more efficient fromList (it currently constructs lots of intermediary
structures
* Make a PVector-based implementation of IArray (?)
* Unit tests
* Benchmarks
Loading

0 comments on commit a7223e0

Please sign in to comment.