Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require './utils.rb' | ||
|
||
def fold(filename, max_width) | ||
File.open(filename, 'r:utf-8') do |file| | ||
# Words in this paragraph | ||
paragraph = [] | ||
|
||
file.each_line do |line| | ||
# If we encounter an empty line, we reformat and dump the current | ||
# paragraph | ||
if line.strip.empty? then | ||
puts fold_paragraph(paragraph, max_width) | ||
puts | ||
paragraph = [] | ||
# Otherwise, we append the words found in the line to the paragraph | ||
else | ||
paragraph.concat line.split | ||
end | ||
end | ||
|
||
# Last paragraph | ||
puts fold_paragraph(paragraph, max_width) unless paragraph.empty? | ||
end | ||
end | ||
|
||
# Fold a single paragraph to the desired width | ||
def fold_paragraph(paragraph, max_width) | ||
# Gradually build our output | ||
str = paragraph.first | ||
width = str.length | ||
|
||
paragraph.drop(1).each do |word| | ||
if width + word.length + 1 <= max_width then | ||
str += ' ' + word | ||
width += word.length + 1 | ||
else | ||
str += "\n" + word | ||
width = word.length | ||
end | ||
end | ||
|
||
str | ||
end | ||
|
||
ARGV.each do |f| | ||
t = benchmark { fold(f, 80) } | ||
STDERR.puts "#{f}: #{t}" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
tests/benchmarks/src/Data/Text/Benchmarks/Programs/Fold.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
-- | Benchmark which formats paragraph, like the @sort@ unix utility. | ||
-- | ||
-- Tested in this benchmark: | ||
-- | ||
-- * Reading the file | ||
-- | ||
-- * Splitting into paragraphs | ||
-- | ||
-- * Reformatting the paragraphs to a certain line width | ||
-- | ||
-- * Concatenating the results using the text builder | ||
-- | ||
-- * Writing back to a handle | ||
-- | ||
{-# LANGUAGE OverloadedStrings #-} | ||
module Data.Text.Benchmarks.Programs.Fold | ||
( benchmark | ||
) where | ||
|
||
import Data.List (foldl') | ||
import Data.List (intersperse) | ||
import Data.Monoid (mempty, mappend, mconcat) | ||
import System.IO (Handle) | ||
import Criterion (Benchmark, bench) | ||
import qualified Data.Text as T | ||
import qualified Data.Text.IO as T | ||
import qualified Data.Text.Lazy.Builder as TLB | ||
import qualified Data.Text.Lazy as TL | ||
import qualified Data.Text.Lazy.IO as TL | ||
|
||
benchmark :: FilePath -> Handle -> IO Benchmark | ||
benchmark i o = return $ | ||
bench "Fold" $ T.readFile i >>= TL.hPutStr o . fold 80 | ||
|
||
-- | We represent a paragraph by a word list | ||
-- | ||
type Paragraph = [T.Text] | ||
|
||
-- | Fold a text | ||
-- | ||
fold :: Int -> T.Text -> TL.Text | ||
fold maxWidth = TLB.toLazyText . mconcat . | ||
intersperse "\n\n" . map (foldParagraph maxWidth) . paragraphs | ||
|
||
-- | Fold a paragraph | ||
-- | ||
foldParagraph :: Int -> Paragraph -> TLB.Builder | ||
foldParagraph _ [] = mempty | ||
foldParagraph max' (w : ws) = fst $ foldl' go (TLB.fromText w, T.length w) ws | ||
where | ||
go (builder, width) word | ||
| width + len + 1 <= max' = | ||
(builder `mappend` " " `mappend` word', width + len + 1) | ||
| otherwise = | ||
(builder `mappend` "\n" `mappend` word', len) | ||
where | ||
word' = TLB.fromText word | ||
len = T.length word | ||
|
||
-- | Divide a text into paragraphs | ||
-- | ||
paragraphs :: T.Text -> [Paragraph] | ||
paragraphs = splitParagraphs . map T.words . T.lines | ||
where | ||
splitParagraphs ls = case break null ls of | ||
([], []) -> [] | ||
(p, []) -> [concat p] | ||
(p, lr) -> concat p : splitParagraphs (dropWhile null lr) |