Skip to content

Commit

Permalink
Add clean script.
Browse files Browse the repository at this point in the history
  • Loading branch information
horndude77 committed Jan 12, 2009
1 parent f0339fb commit 00d5306
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 3 deletions.
77 changes: 77 additions & 0 deletions examples/clean.rb
@@ -0,0 +1,77 @@
#!/usr/bin/env ruby

#This script is used to convert a bunch of grayscale image to black&white and
#then compile the in a pdf.

SCRIPT_DIR = File.dirname($0)
$LOAD_PATH << SCRIPT_DIR
$LOAD_PATH << "#{SCRIPT_DIR}/../lib"

require 'rubygems'
require 'leptonica'
require 'score_tools'

#Convert a grayscale image to a b&w image of size w x h.
def clean_image(image, w, h)
image = ScoreTools.deskew(image)
image = ScoreTools.adaptive_map_threshold(image)
image = ScoreTools.resize(image, w, h)
image = ScoreTools.remove_edge_noise(image)
image = ScoreTools.center!(image)
image
end

out_file_prefix = "out"
page = '0000'
double_page = false
name = 'out'
width, height = 0, 0

i = 0
while(ARGV[i] =~ /^-/)
if(ARGV[i] == '-double-page')
double_page = true
i += 1
elsif(ARGV[i] == '-name')
i += 1
name = ARGV[i]
i += 1
elsif(ARGV[i] == '-size')
i += 1
width, height = ARGV[i].split('x').map{|x|x.to_i}
i += 1
end
end

if(width == 0 || height==0)
puts "Invalid size"
exit
end

files = ARGV[i..-1]
files.each do |file|
puts page
image = Leptonica::Pix.read(file)

if(double_page)
left, right = ScoreTools.split_in_half(image)

left = clean_image(left, width, height)
left.write("#{out_file_prefix}#{page}.tiff", :tiff_g4)
page.succ!

right = clean_image(right, width, height)
right.write("#{out_file_prefix}#{page}.tiff", :tiff_g4)
page.succ!
else
image = clean_image(image, width, height)
image.write("#{out_file_prefix}#{page}.tiff", :tiff_g4)
page.succ!
end

#Free the image (hopefully)
GC.start
end

ScoreTools.files_to_pdf(Dir["out*.tiff"].sort, name)

5 changes: 2 additions & 3 deletions examples/find_pattern.rb
@@ -1,8 +1,7 @@
#!/usr/bin/env ruby

#To run without installing the gem uncomment the following lines:
#SCRIPT_DIR = File.dirname($0)
#$LOAD_PATH << "#{SCRIPT_DIR}/../lib"
SCRIPT_DIR = File.dirname($0)
$LOAD_PATH << "#{SCRIPT_DIR}/../lib"

require 'rubygems'
require 'leptonica'
Expand Down
108 changes: 108 additions & 0 deletions examples/score_tools.rb
@@ -0,0 +1,108 @@
require 'leptonica'

module ScoreTools
def self.adaptive_map_threshold(pix)
reduction = 4
map = pix.background_norm_gray_morph(reduction, 11, 128)
norm = pix.apply_inv_background_gray_map(map, reduction)
threshold = norm.estimate_global_threshold
norm.threshold(threshold)
end

def self.deskew(pix)
skew = 0.0
if(pix.depth == 1)
skew = pix.find_skew
else
threshold = pix.estimate_global_threshold
skew = pix.threshold(threshold).find_skew
end
pix.rotate(degrees_to_radians(skew))
end

def self.split_in_half(pix)
w = pix.width
h = pix.height
lbox = Leptonica::Box.create(0, 0, w/2, h)
rbox = Leptonica::Box.create(w/2+1, 0, w/2, h)
[pix.clip(lbox), pix.clip(rbox)]
end

def self.resize(pix, new_w, new_h)
w = pix.width
h = pix.height
w_diff = new_w - w
h_diff = new_h - h

w_add = w_diff > 0 ? w_diff : 0
h_add = h_diff > 0 ? h_diff : 0
pix_expanded = if(w_add > 0 || h_add > 0)
pix.add_border(0, w_add, 0, h_add)
else
pix
end

w_sub = w_diff < 0 ? w_diff.abs : 0
h_sub = h_diff < 0 ? h_diff.abs : 0
pix_reduced = if(w_sub > 0 || h_sub > 0)
pix_expanded.remove_border(0, w_sub, 0, h_sub)
else
pix_expanded
end
end

def self.files_to_pdf(files, filename)
`tiffcp #{files.join(' ')} #{filename}.tiff`
`tiff2pdf #{filename}.tiff -t"#{filename.gsub('_', ' ')}" -z -o #{filename}.pdf`
`rm #{filename}.tiff`
end

def self.remove_edge_noise(image, smudge_factor = 51, max_iterations = 4)
#remove some initial noise
sel_b = Leptonica::Sel.create_brick(3, 3, 1, 1)

#smudging bricks
smudge_center = smudge_factor/2
sel_h = Leptonica::Sel.create_brick(1, smudge_factor, 1, smudge_center)
sel_v = Leptonica::Sel.create_brick(smudge_factor, 1, smudge_center, 1)

count = 0
content_mask = image.open(sel_b)
loop do
content_mask.dilate!(sel_h)
content_mask.dilate!(sel_v)
#BUG: Sometimes the far right column doesn't get set by a
#horizontal dilate. Setting the border pixels as a workaround.
content_mask.set_border!(1, 1, 1, 1)
content_mask = content_mask.remove_border_components
#content_mask.write("mask#{count}.tiff", :tiff_g4)
count += 1
#p content_mask.count_connected_components
break if(content_mask.count_connected_components <= 1 || count >= max_iterations)
end
image.and(content_mask)
end

def self.center!(image)
#Do an opening to hopefully avoid some noise. This might remove some
#content. If it does open with a smaller brick.
sel = Leptonica::Sel.create_brick(7, 7, 3, 3)
boxa = image.open(sel).connected_components
bounding_box = boxa.extent
x = bounding_box.x
y = bounding_box.y
w = bounding_box.w
h = bounding_box.h
sx = (image.width - w - 2*x)/2
sy = (image.height - h - 2*y)/2
image.shift!(sx, sy)
end

def self.degrees_to_radians(angle)
angle*(Math::PI/180.0)
end

def self.radians_to_degrees(angle)
angle*(180.0/Math::PI)
end
end

0 comments on commit 00d5306

Please sign in to comment.