Font subsetting support. Only includes characters in embedded fonts t…

…hat are actually used in the document. Please report bugs if you find them!
foliojs · Jul 17, 2011 · 9d90e25 · 9d90e25
1 parent 8485332
commit 9d90e25
Show file tree

Hide file tree

Showing 22 changed files with 1,077 additions and 126 deletions.
diff --git a/demo/out.pdf b/demo/out.pdf
diff --git a/lib/data.coffee b/lib/data.coffee
@@ -1,72 +1,60 @@
 class Data    
-    constructor: (@data) ->
+    constructor: (@data = []) ->
         @pos = 0
         @length = @data.length
 
     readByte: ->
         @data[@pos++]
 
+    writeByte: (byte) ->
+        @data[@pos++] = byte
+
     byteAt: (index) ->
         @data[index]
 
     readBool: ->
         return !!@readByte()
 
+    writeBool: (val) ->
+        @writeByte if val then 1 else 0
+
     readUInt32: ->
         b1 = @readByte() << 24
         b2 = @readByte() << 16
         b3 = @readByte() << 8
         b4 = @readByte()
         b1 | b2 | b3 | b4
+
+    writeUInt32: (val) ->
+        @writeByte (val >>> 24) & 0xff
+        @writeByte (val >> 16) & 0xff
+        @writeByte (val >> 8) & 0xff
+        @writeByte val & 0xff
 
     readInt32: ->
         int = @readUInt32()
-        if int >= 2147483648 then int - 4294967296 else int
+        if int >= 0x80000000 then int - 0x100000000 else int
+
+    writeInt32: (val) ->
+        val += 0x100000000 if val < 0
+        @writeUInt32 val
 
     readUInt16: ->
         b1 = @readByte() << 8
         b2 = @readByte()
         b1 | b2
 
+    writeUInt16: (val) ->
+        @writeByte (val >> 8) & 0xff
+        @writeByte val & 0xff
+
     readInt16: ->
         int = @readUInt16()
-        if int >= 32768 then int - 65536 else int
-
-    readFloat32: ->
-        b1 = @readByte()
-        b2 = @readByte()
-        b3 = @readByte()
-        b4 = @readByte()
-
-        sign = 1 - ((b1 >> 7) << 1) # sign = bit 0
-        exp = (((b1 << 1) & 0xFF) | (b2 >> 7)) - 127 # exponent = bits 1..8
-        sig = ((b2 & 0x7F) << 16) | (b3 << 8) | 4 # significand = bits 9..31
+        if int >= 0x8000 then int - 0x10000 else int
 
-        return 0.0 if sig is 0 and exp is -127
-        return sign * (1 + 2e-23 * sig) * Math.pow(2, exp)
-
-    readFloat64: ->
-        b1 = @readByte()
-        b2 = @readByte()
-        b3 = @readByte()
-        b4 = @readByte()
-        b5 = @readByte()
-        b6 = @readByte()
-        b7 = @readByte()
-        b8 = @readByte()
-
-        sign = 1 - ((b1 >> 7) << 1) # sign = bit 0
-        exp = (((b1 << 4) & 0x7FF) | (b2 >> 4)) - 0123 # exponent = bits 1..11
-
-        # This crazy toString() stuff works around the fact that js ints are
-        # only 32 bits and signed, giving us 31 bits to work with
-        sig = (((b2 & 0xF) << 16) | (b3 << 8) | b4).toString(2) +
-                (if b5 >> 7 then '1' else '0') + 
-                (((b5 & 0x7F) << 24) | (b6 << 16) | (b7 << 8) | b8).toString(2) # significand = bits 12..63
-
-        sig = parseInt(sig, 2)
-        return 0.0 if sig is 0 and exp is -1023
-        return sign * (1.0 + 2e-52 * sig) * Math.pow(2, exp)
+    writeInt16: (val) ->
+        val += 0x10000 if val < 0
+        @writeUInt16 val
 
     readString: (length) ->
         ret = []
@@ -75,22 +63,19 @@ class Data
 
         return ret.join ''
 
+    writeString: (val) ->
+        for i in [0...val.length]
+            @writeByte val.charCodeAt(i)
+
     stringAt: (@pos, length) ->
         @readString length
 
     readShort: ->
         @readInt16()
 
-    readLong: ->
-        b1 = @readByte()
-        b2 = @readByte()
-        b3 = @readByte()
-        b4 = @readByte()
-
-        long = (((((b1 << 8) + b2) << 8) + b3) << 8) + b4
-        long += 4294967296 if long < 0
-        return long
-
+    writeShort: (val) ->
+        @writeInt16 val
+
     readLongLong: ->
         b1 = @readByte()
         b2 = @readByte()
@@ -100,16 +85,43 @@ class Data
         b6 = @readByte()
         b7 = @readByte()
         b8 = @readByte()
-        b1 << 56 + b2 << 48 + b3 << 40 | b4 << 32 + b5 << 24 + b6 << 16 + b7 << 8 + b8
+
+        if b1 & 0x80 # sign -> avoid overflow
+            return ((b1 ^ 0xff) * 0x100000000000000 +
+                    (b2 ^ 0xff) *   0x1000000000000 +
+                    (b3 ^ 0xff) *     0x10000000000 +
+                    (b4 ^ 0xff) *       0x100000000 +
+                    (b5 ^ 0xff) *         0x1000000 +
+                    (b6 ^ 0xff) *           0x10000 +
+                    (b7 ^ 0xff) *             0x100 +
+                    (b8 ^ 0xff) + 1) * -1
+
+        return b1 * 0x100000000000000 +
+               b2 *   0x1000000000000 +
+               b3 *     0x10000000000 +
+               b4 *       0x100000000 +
+               b5 *         0x1000000 +
+               b6 *           0x10000 +
+               b7 *             0x100 +
+               b8
+
+    writeLongLong: (val) ->
+        high = Math.floor(val / 0x100000000)
+        low = val & 0xffffffff
+        @writeByte (high >> 24) & 0xff
+        @writeByte (high >> 16) & 0xff
+        @writeByte (high >> 8) & 0xff
+        @writeByte high & 0xff
+        @writeByte (low >> 24) & 0xff
+        @writeByte (low >> 16) & 0xff
+        @writeByte (low >> 8) & 0xff
+        @writeByte low & 0xff
 
     readInt: ->
         @readInt32()
 
-    readFloat: ->
-        @readFloat32()
-
-    readDouble: ->
-        @readFloat64()
+    writeInt: (val) ->
+        @writeInt32 val
 
     slice: (start, end) ->
         @data.slice start, end
@@ -121,4 +133,8 @@ class Data
 
         return buf
 
+    write: (bytes) ->
+        for byte in bytes
+            @writeByte byte
+
 module.exports = Data
diff --git a/lib/document.coffee b/lib/document.coffee
@@ -97,6 +97,10 @@ class PDFDocument
         for key, val of @info when typeof val is 'string'
             @info[key] = PDFObject.s val
 
+        # embed the subsetted fonts
+        for family, font of @_fontFamilies
+            font.embed()
+
         # finalize each page
         for page in @pages
             page.finalize()

diff --git a/lib/font.coffee b/lib/font.coffee
@@ -5,6 +5,7 @@ By Devon Govett
 
 TTFFont = require './font/ttf'
 AFMFont = require './font/afm'
+Subset = require './font/subset'
 zlib = require 'zlib'
 
 class PDFFont
@@ -14,20 +15,29 @@ class PDFFont
 
         else if /\.(ttf|ttc)$/i.test @filename
             @ttf = TTFFont.open @filename, @family
-            @embedTTF()
+            @subset = new Subset @ttf
+            @registerTTF()
 
         else if /\.dfont$/i.test @filename
             @ttf = TTFFont.fromDFont @filename, @family
-            @embedTTF()
+            @subset = new Subset @ttf
+            @registerTTF()
 
         else
             throw new Error 'Not a supported font format or standard PDF font.'
 
-    embedTTF: ->
+    use: (characters) ->
+        @subset?.use characters
+
+    embed: ->
+        @embedTTF() unless @isAFM
+
+    encode: (text) ->
+        @subset?.encodeText(text) or text
+
+    registerTTF: ->
         @scaleFactor = 1000.0 / @ttf.head.unitsPerEm
         @bbox = (Math.round e * @scaleFactor for e in @ttf.bbox)
-
-        @basename = @ttf.name.postscriptName
         @stemV = 0 # not sure how to compute this for true-type fonts...
 
         if @ttf.post.exists
@@ -62,8 +72,14 @@ class PDFFont
 
         @hmtx = @ttf.hmtx
         @charWidths = (Math.round @hmtx.widths[gid] * @scaleFactor for i, gid of @cmap.codeMap when i >= 32)
-
-        data = @ttf.rawData
+
+        # Create a placeholder reference to be filled in embedTTF.
+        @ref = @document.ref
+            Type: 'Font'
+            Subtype: 'TrueType'
+
+    embedTTF: ->
+        data = @subset.encode()
         compressedData = zlib.deflate(data)
 
         @fontfile = @document.ref
@@ -73,9 +89,13 @@ class PDFFont
 
         @fontfile.add compressedData
 
+        cmap = @subset.cmap
+        widths = @subset.charWidths
+        charWidths = (Math.round widths[gid] * @scaleFactor for gid, i in cmap when i >= 32)
+
         @descriptor = @document.ref
             Type: 'FontDescriptor'
-            FontName: @basename
+            FontName: @subset.postscriptName
             FontFile2: @fontfile
             FontBBox: @bbox
             Flags: @flags
@@ -86,16 +106,19 @@ class PDFFont
             CapHeight: @capHeight
             XHeight: @xHeight
 
-        @ref = @document.ref
+        ref = 
             Type: 'Font'
-            BaseFont: @basename
+            BaseFont: @subset.postscriptName
             Subtype: 'TrueType'
             FontDescriptor: @descriptor
             FirstChar: 32
             LastChar: 255
-            Widths: @document.ref @charWidths
+            Widths: @document.ref charWidths
             Encoding: 'MacRomanEncoding'
 
+        for key, val of ref
+            @ref.data[key] = val
+
     embedStandard: ->
         @isAFM = true
         font = AFMFont.open __dirname + "/font/data/#{@filename}.afm"

diff --git a/lib/font/directory.coffee b/lib/font/directory.coffee
@@ -1,3 +1,5 @@
+Data = require '../data'
+
 class Directory
     constructor: (data) ->
         @scalarType = data.readInt()
@@ -15,5 +17,64 @@ class Directory
                 length: data.readInt()
 
             @tables[entry.tag] = entry
+
+    encode: (tables) ->
+        tableCount = Object.keys(tables).length
+        log2 = Math.log(2)
+
+        searchRange = Math.floor(Math.log(tableCount) / log2) * 16
+        entrySelector = Math.floor searchRange / log2
+        rangeShift = tableCount * 16 - searchRange
+
+        directory = new Data
+        directory.writeInt @scalarType
+        directory.writeShort tableCount
+        directory.writeShort searchRange
+        directory.writeShort entrySelector
+        directory.writeShort rangeShift
+
+        directoryLength = tableCount * 16
+        offset = directory.pos + directoryLength
+        headOffset = null
+        tableData = []
+
+        # encode the font table directory
+        for tag, table of tables
+            directory.writeString tag
+            directory.writeInt checksum(table)
+            directory.writeInt offset
+            directory.writeInt table.length
+
+            tableData = tableData.concat(table)
+            headOffset = offset if tag is 'head'
+            offset += table.length
+
+            while offset % 4
+                tableData.push 0
+                offset++
+
+        # write the actual table data to the font
+        directory.write(tableData)
+
+        # calculate the font's checksum
+        sum = checksum(directory.data)
+
+        # set the checksum adjustment in the head table
+        adjustment = 0xB1B0AFBA - sum
+        directory.pos = headOffset + 8
+        directory.writeUInt32 adjustment
+
+        return new Buffer(directory.data)
+
+    checksum = ([data...]) ->
+        while data.length % 4
+            data.push 0
+
+        tmp = new Data(data)
+        sum = 0
+        for i in [0...data.length] by 4
+            sum += tmp.readUInt32()
+
+        return sum & 0xFFFFFFFF
 
 module.exports = Directory