Skip to content

Commit

Permalink
Add fold benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
jaspervdj committed Jun 16, 2011
1 parent 4e79b4e commit 8de5ea9
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
50 changes: 50 additions & 0 deletions tests/benchmarks/ruby/fold.rb
@@ -0,0 +1,50 @@
#!/usr/bin/env ruby

require './utils.rb'

def fold(filename, max_width)
File.open(filename, 'r:utf-8') do |file|
# Words in this paragraph
paragraph = []

file.each_line do |line|
# If we encounter an empty line, we reformat and dump the current
# paragraph
if line.strip.empty? then
puts fold_paragraph(paragraph, max_width)
puts
paragraph = []
# Otherwise, we append the words found in the line to the paragraph
else
paragraph.concat line.split
end
end

# Last paragraph
puts fold_paragraph(paragraph, max_width) unless paragraph.empty?
end
end

# Fold a single paragraph to the desired width
def fold_paragraph(paragraph, max_width)
# Gradually build our output
str = paragraph.first
width = str.length

paragraph.drop(1).each do |word|
if width + word.length + 1 <= max_width then
str += ' ' + word
width += word.length + 1
else
str += "\n" + word
width = word.length
end
end

str
end

ARGV.each do |f|
t = benchmark { fold(f, 80) }
STDERR.puts "#{f}: #{t}"
end
2 changes: 2 additions & 0 deletions tests/benchmarks/src/Data/Text/Benchmarks.hs
Expand Up @@ -23,6 +23,7 @@ import qualified Data.Text.Benchmarks.WordFrequencies as WordFrequencies

import qualified Data.Text.Benchmarks.Programs.BigTable as Programs.BigTable
import qualified Data.Text.Benchmarks.Programs.Cut as Programs.Cut
import qualified Data.Text.Benchmarks.Programs.Fold as Programs.Fold
import qualified Data.Text.Benchmarks.Programs.Sort as Programs.Sort
import qualified Data.Text.Benchmarks.Programs.StripTags as Programs.StripTags
import qualified Data.Text.Benchmarks.Programs.Throughput as Programs.Throughput
Expand Down Expand Up @@ -54,6 +55,7 @@ benchmarks = do
ps <- bgroup "Programs" `fmap` sequence
[ Programs.BigTable.benchmark sink
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
, Programs.Fold.benchmark (tf "russian.txt") sink
, Programs.Sort.benchmark (tf "russian.txt") sink
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
, Programs.Throughput.benchmark (tf "russian.txt") sink
Expand Down
68 changes: 68 additions & 0 deletions tests/benchmarks/src/Data/Text/Benchmarks/Programs/Fold.hs
@@ -0,0 +1,68 @@
-- | Benchmark which formats paragraph, like the @sort@ unix utility.
--
-- Tested in this benchmark:
--
-- * Reading the file
--
-- * Splitting into paragraphs
--
-- * Reformatting the paragraphs to a certain line width
--
-- * Concatenating the results using the text builder
--
-- * Writing back to a handle
--
{-# LANGUAGE OverloadedStrings #-}
module Data.Text.Benchmarks.Programs.Fold
( benchmark
) where

import Data.List (foldl')
import Data.List (intersperse)
import Data.Monoid (mempty, mappend, mconcat)
import System.IO (Handle)
import Criterion (Benchmark, bench)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark i o = return $
bench "Fold" $ T.readFile i >>= TL.hPutStr o . fold 80

-- | We represent a paragraph by a word list
--
type Paragraph = [T.Text]

-- | Fold a text
--
fold :: Int -> T.Text -> TL.Text
fold maxWidth = TLB.toLazyText . mconcat .
intersperse "\n\n" . map (foldParagraph maxWidth) . paragraphs

-- | Fold a paragraph
--
foldParagraph :: Int -> Paragraph -> TLB.Builder
foldParagraph _ [] = mempty
foldParagraph max' (w : ws) = fst $ foldl' go (TLB.fromText w, T.length w) ws
where
go (builder, width) word
| width + len + 1 <= max' =
(builder `mappend` " " `mappend` word', width + len + 1)
| otherwise =
(builder `mappend` "\n" `mappend` word', len)
where
word' = TLB.fromText word
len = T.length word

-- | Divide a text into paragraphs
--
paragraphs :: T.Text -> [Paragraph]
paragraphs = splitParagraphs . map T.words . T.lines
where
splitParagraphs ls = case break null ls of
([], []) -> []
(p, []) -> [concat p]
(p, lr) -> concat p : splitParagraphs (dropWhile null lr)

0 comments on commit 8de5ea9

Please sign in to comment.