Permalink
Browse files

code from sphinx package on hackage

  • Loading branch information...
0 parents commit c12942bdf45e5df0fcf394cc7cce8696f77c06e6 Greg Weber committed Jul 4, 2009
30 LICENSE
@@ -0,0 +1,30 @@
+Copyright (c) 2008, Tupil
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Tupil nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,4 @@
+#! /usr/bin/env runhaskell
+
+> import Distribution.Simple
+> main = defaultMain
@@ -0,0 +1,96 @@
+-- | This is the Haskell version of the Sphinx searchd client.
+
+module Text.Search.Sphinx ( Configuration (..)
+ , query
+ , defaultConfig
+ ) where
+
+import Network
+import IO hiding (bracket)
+import System
+import Control.Exception
+import Data.Binary.Get
+import Data.Binary.Put (runPut, Put)
+import Data.ByteString.Lazy hiding (pack, length, map, groupBy, head)
+import Data.ByteString.Lazy.Char8 (pack)
+import qualified Data.ByteString.Lazy as BS
+import Data.Char (ord, chr)
+import Data.Int (Int64)
+import Prelude hiding (readList)
+import Text.Search.Sphinx.Get
+import Text.Search.Sphinx.Put
+import Text.Search.Sphinx.Configuration
+import Text.Search.Sphinx.Types (SearchResult)
+import qualified Text.Search.Sphinx.Types as T
+
+
+type Connection = (Handle, Configuration)
+
+
+connect :: Configuration -> IO Connection
+connect cfg = do connection <- connectTo (host cfg) (PortNumber $ fromIntegral $ port cfg)
+ bs <- hGet connection 4
+ let version = runGet getWord32be bs
+ myVersion = runPut (num 1)
+ hPut connection myVersion
+ return (connection, cfg)
+
+
+addQuery :: Configuration -> String -> String -> String -> Put
+addQuery cfg query index comment = do
+ nums cfg [ offset
+ , limit
+ , T.matchMode . mode
+ , T.rank . ranker
+ , T.sort . sort]
+ str (sortBy cfg)
+ str query
+ numList (weights cfg)
+ str index
+ num 1
+ num64s cfg [minId, maxId]
+ num 0 -- todo: pack len filters + filters
+ enum (groupByFunc cfg)
+ str (groupBy cfg)
+ num (maxMatches cfg)
+ str (groupSort cfg)
+ num (cutoff cfg)
+ num (retryCount cfg)
+ num (retryDelay cfg)
+ str (groupDistinct cfg)
+ num 0 -- anchor point: todo
+ stringIntList (indexWeights cfg)
+ num (maxQueryTime cfg)
+ stringIntList (fieldWeights cfg)
+ str comment
+
+-- | The 'query' function queries the Sphinx daemon.
+query :: Configuration -- ^ The configuration
+ -> String -- ^ The indexes, "*" means every index
+ -> String -- ^ The query string
+ -> IO SearchResult
+query config indexes s = do
+ conn <- connect config
+ let q = addQuery config s indexes ""
+ results <- runQueries (fst conn) q 1
+ return $ head results -- We only do one query, so we always have one SearchResult
+
+runQueries :: Handle -> Put -> Int -> IO [SearchResult]
+runQueries conn q numQueries = do
+ let req = runPut (makeRunQuery q numQueries)
+ hPut conn req
+ hFlush conn
+ getResponse conn numQueries
+
+makeRunQuery query numQueries = do
+ cmd T.ScSearch
+ verCmd T.VcSearch
+ num $ fromEnum $ BS.length (runPut query) + 4
+ num numQueries
+ query
+
+getResponse conn numResults = do
+ header <- hGet conn 8
+ let x@(status, version, len) = readHeader header
+ response <- hGet conn (fromIntegral len)
+ return $ runGet (numResults `times` getResult) response
@@ -0,0 +1,77 @@
+module Text.Search.Sphinx.Configuration where
+
+import qualified Text.Search.Sphinx.Types as T
+
+-- | The configuration for a query
+data Configuration = Configuration {
+ -- | The hostname of the Sphinx daemon
+ host :: String
+ -- | The portnumber of the Sphinx daemon
+ , port :: Int
+ -- | Per-field weights
+ , weights :: [Int]
+ -- | How many records to seek from result-set start (default is 0)
+ , offset :: Int
+ -- | How many records to return from result-set starting at offset (default is 20)
+ , limit :: Int
+ -- | Query matching mode
+ , mode :: T.MatchMode
+ -- | Ranking mode
+ , ranker :: T.Rank
+ -- | Match sorting mode
+ , sort :: T.Sort
+ -- | Attribute to sort by
+ , sortBy :: String
+ -- | Minimum ID to match, 0 means no limit
+ , minId :: Int
+ -- | Maximum ID to match, 0 means no limit
+ , maxId :: Int
+ -- | Group-by sorting clause (to sort groups in result set with)
+ , groupBy :: String
+ -- | Group-by count-distinct attribute
+ , groupSort :: String
+ -- | Group-by function (to pre-process group-by attribute value with)
+ , groupByFunc :: T.GroupByFunction
+ -- | Group-by attribute name
+ , groupDistinct :: String
+ -- | Maximum number of matches to retrieve
+ , maxMatches :: Int
+ -- | Cutoff to stop searching at
+ , cutoff :: Int
+ -- | Distributed retries count
+ , retryCount :: Int
+ -- | Distributed retries delay
+ , retryDelay :: Int
+ -- | Per-index weights
+ , indexWeights :: [(String, Int)]
+ -- | Maximum query time in milliseconds, 0 means no limit
+ , maxQueryTime :: Int
+ -- | Per-field-name weights
+ , fieldWeights :: [(String, Int)]
+}
+
+-- | A basic, default configuration.
+defaultConfig = Configuration {
+ port = 3312
+ , host = "127.0.0.1"
+ , weights = []
+ , offset = 0
+ , limit = 20
+ , mode = T.All
+ , ranker = T.ProximityBm25
+ , sort = T.Relevance
+ , sortBy = ""
+ , minId = 0
+ , maxId = 0
+ , groupSort = "@group desc"
+ , groupBy = ""
+ , groupByFunc = T.Day
+ , groupDistinct = ""
+ , maxMatches = 1000
+ , cutoff = 0
+ , retryCount = 0
+ , retryDelay = 0
+ , indexWeights = []
+ , maxQueryTime = 0
+ , fieldWeights = []
+ }
@@ -0,0 +1,62 @@
+module Text.Search.Sphinx.Get where
+
+import Data.Binary.Get
+import Data.Int (Int64)
+import Prelude hiding (readList)
+import Data.ByteString.Lazy hiding (pack, length, map, groupBy)
+import Control.Monad
+import Text.Search.Sphinx.Types
+
+-- Utility functions
+getNum :: Get Int
+getNum = getWord32be >>= return . fromEnum
+
+getNum64 :: Get Int64
+getNum64 = getWord64be >>= return . fromIntegral
+
+getNums = readList getNum
+readList f = do num <- getNum
+ num `times` f
+times = replicateM
+readField = readStr
+readStr = do len <- getNum
+ getLazyByteString (fromIntegral len)
+
+
+getResult :: Get SearchResult
+getResult = do
+ status <- getNum
+ -- todo: we suppose the status is OK
+ fields <- readList readField
+ attrs <- readList readAttr
+ matchCount <- getNum
+ id64 <- getNum
+ matches <- matchCount `times` readMatch (id64 > 0) (map snd attrs)
+ [total, totalFound, time, numWords] <- 4 `times` getNum
+ wrds <- numWords `times` readWord
+ return (SearchResult matches total totalFound wrds)
+
+
+readWord = do s <- readStr
+ [doc, hits] <- 2 `times` getNum
+ return (s, doc, hits)
+
+readMatch isId64 attrs = do
+ doc <- if isId64 then getNum64 else (getNum >>= return . fromIntegral)
+ weight <- getNum
+ matchAttrs <- mapM readMatchAttr attrs
+ return $ Match doc weight matchAttrs
+
+readMatchAttr AttrTFloat = error "readMatchAttr for AttrFloat not implemented yet."
+readMatchAttr AttrTMulti = getNums >>= return . AttrMulti
+readMatchAttr _ = getNum >>= return . AttrNum
+
+readAttr = do
+ s <- readStr
+ t <- getNum
+ return (s, toEnum t)
+
+readHeader = runGet $ do status <- getWord16be
+ version <- getWord16be
+ length <- getWord32be
+ return (status, version, length)
@@ -0,0 +1,83 @@
+module Text.Search.Sphinx.Indexable (Indexable (..), SchemaType (..), AttrType (..), Id,
+ SphinxSchema (..), serialize)
+ where
+
+--import Text.Search.Sphinx.Types
+import Text.XML.Light
+
+-- TODO: this should really be the same as Types.Attr
+data Indexable = NumAttr Int
+ | StrAttr String
+ | Field String
+
+data SchemaType = TField
+ | TAttribute AttrType
+
+data AttrType = AString | AInt
+
+type Id = Int
+
+class SphinxSchema a where
+ -- | Convert a value of a to a document with a document id and some attributes and fields.
+ toDocument :: a -> (Id, [(String, Indexable)])
+ -- | The first parameter should be ignored, but is used to satisfy Haskell's type system.
+ schema :: a -> [(String, SchemaType)]
+
+serialize :: SphinxSchema a => [a] -> Element
+serialize items =
+ sphinxEl "docset" << (
+ sphinxEl "schema" << (map schemaField $ schema (head $ items))
+ : map (doc . toDocument) items
+ )
+
+doc :: (Id, [(String, Indexable)]) -> Element
+doc (id, fields) = sphinxEl "document" ! [("id", show id)] <<
+ map docEl fields
+
+docEl :: (String, Indexable) -> Element
+docEl (name, content) = normalEl name `text` indexableEl content
+
+indexableEl (NumAttr i) = simpleText $ show i
+indexableEl (StrAttr f) = simpleText $ f
+indexableEl (Field f) = simpleText $ f
+
+simpleText s = CData { cdVerbatim = CDataText
+ , cdData = s
+ , cdLine = Nothing
+ }
+
+schemaField :: (String, SchemaType) -> Element
+schemaField (name, TField) = sphinxEl "field" ! [("name", name)]
+schemaField (name, TAttribute t) = sphinxEl "attr" ! [("name", name), ("type", attrType t)]
+
+attrType :: AttrType -> String
+attrType AString = "str2ordinal"
+attrType AInt = "int"
+
+text :: Element -> CData -> Element
+text el dat = el {elContent = [Text dat]}
+
+(<<) :: Element -> [Element] -> Element
+a << b = a {elContent = map Elem b}
+
+(!) :: Element -> [(String, String)] -> Element
+el ! attrs = el {elAttribs = [Attr (unqual name) value | (name, value) <- attrs]}
+
+sphinxEl :: String -> Element
+sphinxEl name = Element { elName = sphinxNm name
+ , elAttribs = []
+ , elContent = []
+ , elLine = Nothing
+ }
+
+normalEl :: String -> Element
+normalEl name = Element { elName = unqual name
+ , elAttribs = []
+ , elContent = []
+ , elLine = Nothing
+ }
+
+sphinxNm name = blank_name { qPrefix = Just "sphinx"
+ , qURI = Nothing
+ , qName = name
+ }
@@ -0,0 +1,31 @@
+module Text.Search.Sphinx.Put where
+
+import Data.Binary.Put
+import Data.ByteString.Lazy hiding (pack, length, map, groupBy)
+import Data.ByteString.Lazy.Char8 (pack)
+import qualified Data.ByteString.Lazy as BS
+import qualified Text.Search.Sphinx.Types as T
+
+num = putWord32be . toEnum
+num64 = putWord64be . toEnum
+enum :: Enum a => a -> Put
+enum = num . fromEnum
+numList ls = do num (length ls)
+ mapM_ num ls
+nums cfg = mapM_ (\x -> num $ x cfg)
+num64s cfg = mapM_ (\x -> num64 $ x cfg)
+
+stringIntList :: [(String, Int)] -> Put
+stringIntList xs = num (length xs) >> mapM_ strInt xs
+ where strInt (s,i) = str s >> num i
+
+str :: String -> Put
+str s = do let bs = pack s
+ num (fromEnum $ BS.length bs)
+ putLazyByteString bs
+
+cmd :: T.SearchdCommand -> Put
+cmd = putWord16be . toEnum . T.searchdCommand
+
+verCmd :: T.VerCommand -> Put
+verCmd = putWord16be . toEnum . T.verCommand
Oops, something went wrong.

0 comments on commit c12942b

Please sign in to comment.