From f332fd68faebf377959630b569639bad620785b2 Mon Sep 17 00:00:00 2001 From: Richard Wolsch Date: Mon, 6 Sep 2021 09:49:10 +0200 Subject: [PATCH] Add support for checkboxes which are organized as children (/Kids) of parent form elements. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit new: - set the checkbox value in child AND parent - remove checkbox parent object IDs from /Fields definition (otherwise they will not display their value correctly due to /NeedAppearances – which itself is needed to display the value of text inputs correctly) --- src/fpdm.php | 960 +++++++++++++++++++++++++++------------------------ 1 file changed, 504 insertions(+), 456 deletions(-) diff --git a/src/fpdm.php b/src/fpdm.php index fb8627d..7a36aaf 100644 --- a/src/fpdm.php +++ b/src/fpdm.php @@ -26,7 +26,7 @@ * stream core with advanced verbose output. Fix() bonus for corrupted pdfs. * * V2.3 (28.11.2010) stream type was lost when /length defined after /Filter * * V2.2 (27.11.2010) Stream filter improved:decode now handles multi filters! * * -* V2.1 (25.11.2010) Only filter support for streams, trailer detection was * +* V2.1 (25.11.2010) Only filter support for streams, trailer detection was * * too restrictive. fixes FDF error occuring when empty array data is given. * * V2.0 (05.11.2010) Load support for inline text fields datas or FDF content * * V1.1 (04.11.2010) Works now under php4 for backward compat. * @@ -36,6 +36,7 @@ global $FPDM_FILTERS, $FPDM_REGEXPS; //needs explicit global scope, otherwise autoloading will be incomplete. $FPDM_FILTERS=array(); //holds all supported filters $FPDM_REGEXPS= array( + "/Parent" => "/\/Parent\s+(\d+)\s(\d+)\sR$/", //FIX: parse checkbox definition "/AS"=>"/\/AS\s+\/(\w+)$/", "name"=>"/\/(\w+)/", @@ -58,61 +59,61 @@ $__tmp = version_compare(phpversion(), "5") == -1 ? array('FPDM') : array('FPDM', false); if (!call_user_func_array('class_exists', $__tmp)) { - - - define('FPDM_VERSION',2.9); - + + + define('FPDM_VERSION',2.9); + define('FPDM_INVALID',0); define('FPDM_STATIC',1); define('FPDM_COMMON',2); define('FPDM_VERBOSE',3); define('FPDM_CACHE',dirname(__FILE__).'/export/cache/'); //cache directory for fdf temporary files needed by pdftk. define('FPDM_PASSWORD_MAX_LEN',15); //Security to prevent shell overflow. - + class FPDM { //@@@@@@@@@ var $useCheckboxParser = false; //boolean: allows activation of custom checkbox parser (not available in original FPDM source) var $pdf_source = ''; //string: full pathname to the input pdf , a form file var $fdf_source = ''; //string: full pathname to the input fdf , a form data file - var $pdf_output = ''; //string: full pathname to the resulting filled pdf - + var $pdf_output = ''; //string: full pathname to the resulting filled pdf + var $pdf_entries = array(); //array: Holds the content of the pdf file as array var $fdf_content = ''; //string: holds the content of the fdf file - var $fdf_parse_needed = false;//boolean: false will use $fields data else extract data from fdf content - var $value_entries = array(); //array: a map of values to faliclitate access and changes - + var $fdf_parse_needed = false;//boolean: false will use $fields data else extract data from fdf content + var $value_entries = array(); //array: a map of values to faliclitate access and changes + var $positions = array(); //array, stores what object id is at a given position n ($positions[n]=) - + var $offsets = array(); //array of offsets for objects, index is the object's id, starting at 1 - var $pointer = 0; //integer, Current line position in the pdf file during the parsing - + var $pointer = 0; //integer, Current line position in the pdf file during the parsing + var $shifts = array(); //array, Shifts of objects in the order positions they appear in the pdf, starting at 0. var $shift = 0; //integer, Global shift file size due to object values size changes - + var $streams = ''; //Holds streams configuration found during parsing var $streams_filter = ''; //Regexp to decode filter streams - + var $safe_mode = false; //boolean, if set, ignore previous offsets do no calculations for the new xref table, seek pos directly in file var $check_mode = false; //boolean, Use this to track offset calculations errors in corrupteds pdfs files for sample var $halt_mode = false; //if true, stops when offset error is encountered - + var $info = array(); //array, holds the info properties var $fields = array(); //array that holds fields-Data parsed from FDF - - var $verbose = false; //boolean , a debug flag to decide whether or not to show internal process + + var $verbose = false; //boolean , a debug flag to decide whether or not to show internal process var $verbose_level = 1; //integer default is 1 and if greater than 3, shows internal parsing as well - + var $support = ''; //string set to 'native' for fpdm or 'pdftk' for pdf toolkit var $flatten_mode = false; //if true, flatten field data as text and remove form fields (NOT YET SUPPORTED BY FPDM) var $compress_mode = false; //boolean , pdftk feature only to compress streams var $uncompress_mode = false; //boolean pdftk feature only to uncompress streams - var $security = array(); //Array holding securtity settings + var $security = array(); //Array holding securtity settings //(password owner nad user, encrypt (set to 40 or 128 or 0), allow ] see pdfk help var $needAppearancesTrue = false; //boolean, indicates if /NeedAppearances is already set to true var $isUTF8 = false; //boolean (true for UTF-8, false for ISO-8859-1) - + /** * Constructor * @@ -121,68 +122,68 @@ class FPDM { *@param string $fdf_source Source-Filename *@param boolean $verbose , optional false per default */ - function __construct() { + function __construct() { //============== - + $args=func_get_args(); $num_args=func_num_args(); - + $FDF_FILE=($num_args>=FPDM_COMMON); $VERBOSE_FLAG=($num_args>=FPDM_VERBOSE); - + $verbose=false; - + //We are not joking here, let's have a polymorphic constructor! switch($num_args) { case FPDM_INVALID: $this->Error("Invalid instantiation of FPDM, requires at least one param"); break; - case FPDM_STATIC: - if($args[0] =='[_STATIC_]') break; //static use, caller is anonymous function defined in _set_field_value + case FPDM_STATIC: + if($args[0] =='[_STATIC_]') break; //static use, caller is anonymous function defined in _set_field_value //else this is the pdf_source then, fdf content is loaded using Load() function default: case FPDM_VERBOSE: //Use the verbose value provided if($VERBOSE_FLAG) $verbose=$args[2]; case FPDM_COMMON: //Common use $this->pdf_source = $args[0];//Blank pdf form - + if($FDF_FILE) { $this->fdf_source = $args[1];//Holds the data of the fields to fill the form - $this->fdf_parse_needed=true; + $this->fdf_parse_needed=true; } - + //calculation and map $this->offsets=array(); $this->pointer=0; $this->shift=0; $this->shifts=array(); $this->n=0; - + //Stream filters $filters=$this->getFilters("|"); $this->streams_filter="/(\/($filters))+/"; //$this->dumpContent($this->streams_filter); - + $this->info=array(); - + //Debug modes $this->verbose=$verbose; - $this->verbose_level=($verbose&&is_int($verbose)) ? $verbose : 1; + $this->verbose_level=($verbose&&is_int($verbose)) ? $verbose : 1; $this->safe_mode=false; $this->check_mode=false; //script will takes much more time if you do so $this->halt_mode=true; - + $this->support='native'; //may ne overriden $this->security=array('password'=>array('owner'=>null,'user'=>null),'encrypt'=>0,'allow'=>array()); - + //echo "
filesize:".filesize($this->pdf_source); $this->load_file('PDF'); - + if($FDF_FILE) $this->load_file('FDF'); - + } } - + /** *Loads a form data to be merged * @@ -195,44 +196,44 @@ function Load($data,$isUTF8=false) { $this->isUTF8 = $isUTF8; $this->load_file('FDF',$data); } - + /** *Loads a file according to its type * *@access private *@param string type 'PDF' or 'FDF' - *@param String|array content the data content of FDF files only or directly the fields values as array + *@param String|array content the data content of FDF files only or directly the fields values as array **/ function load_file($type,$content=NULL) { //------------------------------------ switch($type) { - case "PDF" : + case "PDF" : if($content) $this->Error("load_file do not accept PDF content, only FDF content sorry"); else $this->pdf_entries = $this->getEntries($this->pdf_source,'PDF'); break; - case "FDF" : + case "FDF" : if(!is_null($content)) { if(is_array($content)) { $this->fields=$content; $this->fdf_parse_needed=false; //$this->dumpEntries($content,"PDF fields content"); } else if(is_string($content)){ //String - $this->fdf_content = $content; //TODO: check content - $this->fdf_parse_needed=true; + $this->fdf_content = $content; //TODO: check content + $this->fdf_parse_needed=true; } else $this->Error('Invalid content type for this FDF file!'); } else { $this->fdf_content = $this->getContent($this->fdf_source,'FDF'); - $this->fdf_parse_needed=true; + $this->fdf_parse_needed=true; } break; default: $this->Error("Invalid file type $type"); } } - + /** *Set a mode and play with your power debug toys * @@ -274,13 +275,13 @@ function set_modes($mode,$value) { $this->Error("set_modes error, Invalid mode '$mode'"); } } - + /** *Retrieves informations of the pdf * - *@access public + *@access public *@note To track PDF versions and so on... - *@param Boolean output + *@param Boolean output **/ function Info($asArray=false) { //---------------------- @@ -303,14 +304,14 @@ function Info($asArray=false) { $this->dumpEntries($info); } } - + /** *Changes the support * - *@access public + *@access public *@internal fixes xref table offsets *@note special playskool toy for Christmas dedicated to my impatient fanclub (Grant, Kris, nejck,...) - *@param String support Allow to use external support that has more advanced features (ie 'pdftk') + *@param String support Allow to use external support that has more advanced features (ie 'pdftk') **/ function Plays($cool) { //---------------------- @@ -319,11 +320,11 @@ function Plays($cool) { else $this->support='native'; } - + /** *Fixes a corrupted PDF file * - *@access public + *@access public *@internal fixes xref table offsets *@note Real work is not made here but by Merge that should be launched after to complete the work **/ @@ -333,26 +334,26 @@ function Fix() { $this->set_modes('check',true); //Compare xref table offsets with objects offsets in the pdf file $this->set_modes('halt',false); //Do no stop on errors so fix is applied during merge process } - + //######## pdftk's output configuration ####### - + /** *Decides to use the compress filter to restore compression. - *@note This is only useful when you want to repack PDF that was previously edited in a text editor like vim or emacs. + *@note This is only useful when you want to repack PDF that was previously edited in a text editor like vim or emacs. **/ function Compress() { //------------------- - $this->set_modes('compress',true); + $this->set_modes('compress',true); $this->support="pdftk"; } - + /** *Decides to remove PDF page stream compression by applying the uncompress filter. - *@note This is only useful when you want to edit PDF code in a text editor like vim or emacs. + *@note This is only useful when you want to edit PDF code in a text editor like vim or emacs. **/ function Uncompress() { //--------------------- - $this->set_modes('uncompress',true); + $this->set_modes('uncompress',true); $this->support="pdftk"; } /** @@ -360,10 +361,10 @@ function Uncompress() { **/ function Flatten() { //----------------- - $this->set_modes('flatten',true); + $this->set_modes('flatten',true); $this->support="pdftk"; } - + /*** *Defines a password type *@param String type , 'owner' or 'user' @@ -380,8 +381,8 @@ function Password($type,$code) { } $this->support="pdftk"; } - - + + /** *Defines the encrytion to the given bits *@param integer $bits 0, 40 or 128 @@ -391,7 +392,7 @@ function Encrypt($bits) { switch($bits) { case 0: case 40: - case 128: + case 128: $this->security["encrypt"]=$bits; break; default: @@ -399,7 +400,7 @@ function Encrypt($bits) { } $this->support="pdftk"; } - + /** *Allow permissions * @@ -460,43 +461,43 @@ function Allow($permissions=null) { $this->support="pdftk"; } } - + //############################# - + /** *Merge FDF file with a PDF file * - *@access public + *@access public *@note files has been provided during the instantiation of this class *@internal flatten mode is not yet supported *@param Boolean flatten Optional, false by default, if true will use pdftk (requires a shell) to flatten the pdf form **/ function Merge($flatten=false) { //------------------------------ - + if($flatten) $this->Flatten(); - - + + if($this->support == "native") { - + if($this->fdf_parse_needed) { $fields=$this->parseFDFContent(); }else { $fields=$this->fields; } - + $count_fields=count($fields); - - if($this->verbose&&($count_fields==0)) + + if($this->verbose&&($count_fields==0)) $this->dumpContent("The FDF content has either no field data or parsing may failed","FDF parser: "); - + $fields_value_definition_lines=array(); - + $count_entries=$this->parsePDFEntries($fields_value_definition_lines); - - + + if($count_entries) { - + $this->value_entries=$fields_value_definition_lines; if($this->verbose) { $this->dumpContent("$count_entries Field entry values found for $count_fields field values to fill","Merge info: "); @@ -511,21 +512,21 @@ function Merge($flatten=false) { } } //=========================================================== - - //===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) ======= - - //Update cross reference table to match object size changes + + //===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) ======= + + //Update cross reference table to match object size changes $this->fix_xref_table(); - + //update the pointer to the cross reference table $this->fix_xref_start(); - + }else $this->Error("PDF file is empty!"); - + } //else pdftk's job is done in Output, not here. } - + /** *Warns verbose/output conflicts * @@ -536,7 +537,7 @@ function Close($dest) { //---------------- $this->Error("Output: Verbose mode should be desactivated, it is incompatible with this output mode $dest"); } - + /** *Get current pdf content (without any offset fixes) * @@ -554,8 +555,8 @@ function get_buffer($pdf_file=''){ } return $buffer; } - - + + /** *Output PDF to some destination * @@ -566,37 +567,37 @@ function get_buffer($pdf_file=''){ **/ function Output($dest='', $name=''){ //----------------------------------- - + $pdf_file=''; - + if($this->support == "pdftk") { //As PDFTK can only merge FDF files not data directly, require_once("lib/url.php"); //we will need a url support because relative urls for pdf inside fdf files are not supported by PDFTK... - require_once("export/fdf/fdf.php"); //...conjointly with my patched/bridged forge_fdf that provides fdf file generation support from array data. + require_once("export/fdf/fdf.php"); //...conjointly with my patched/bridged forge_fdf that provides fdf file generation support from array data. require_once("export/pdf/pdftk.php");//Of course don't forget to bridge to PDFTK! $tmp_file=false; $pdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->pdf_source)); //string: full pathname to the input pdf , a form file - + if($this->fdf_source) { //FDF file provided $fdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->fdf_source)); }else { - + $pdf_url=getUrlfromDir($pdf_file); //Normaly http scheme not local file - + if($this->fdf_parse_needed) { //fdf source was provided $pdf_data=$this->parseFDFContent(); }else { //fields data was provided as an array, we have to generate the fdf file $pdf_data=$this->fields; - } - + } + $fdf_file=fix_path(FPDM_CACHE)."fields".rnunid().".fdf"; $tmp_file=true; $ret=output_fdf($pdf_url,$pdf_data,$fdf_file); if(!$ret["success"]) $this->Error("Output failed as something goes wrong (Pdf was $pdf_url)
during internal FDF generation of file $fdf_file,
Reason is given by {$ret['return']}"); } - + //Serializes security options (not deeply tested) $security=''; if(!is_null($this->security["password"]["owner"])) $security.=' owner_pw "'.substr($this->security["password"]["owner"],0,FPDM_PASSWORD_MAX_LEN).'"'; @@ -604,31 +605,31 @@ function Output($dest='', $name=''){ if($this->security["encrypt"]!=0) $security.=' encrypt_'.$this->security["encrypt"].'bit'; if(count($this->security["allow"])>0) { $permissions=$this->security["allow"]; - $security.=' allow '; + $security.=' allow '; foreach($permissions as $permission) $security.=' '.$permission; } - + //Serialize output modes $output_modes=''; if($this->flatten_mode) $output_modes.=' flatten'; if($this->compress_mode) $output_modes.=' compress'; if($this->uncompress_mode) $output_modes.=' uncompress'; - - + + $ret=pdftk($pdf_file,$fdf_file,array("security"=>$security,"output_modes"=>$output_modes)); - + if($tmp_file) @unlink($fdf_file); //Clear cache - + if($ret["success"]) { $pdf_file=$ret["return"]; - }else + }else $this->Error($ret["return"]); } - + //$this->buffer=$this->get_buffer($pdf_file); - - + + $dest=strtoupper($dest); if($dest=='') { @@ -640,12 +641,12 @@ function Output($dest='', $name=''){ else $dest='F'; } - + //Abort to avoid to polluate output if($this->verbose&&(($dest=='I')||($dest=='D'))) { - $this->Close($dest); + $this->Close($dest); } - + switch($dest) { case 'I': @@ -675,7 +676,7 @@ function Output($dest='', $name=''){ $this->Error('Some data has already been output, can\'t send PDF file'); header('Content-Length: '.strlen($this->get_buffer())); header('Content-Disposition: attachment; filename="'.$name.'"'); - + header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); // always modified header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0"); // HTTP/1.1 @@ -693,7 +694,7 @@ function Output($dest='', $name=''){ $f=fopen($name,'wb'); if(!$f) $this->Error('Unable to create output file: '.$name.' (currently opened under Acrobat Reader?)'); - + fwrite($f,$this->get_buffer(),strlen($this->get_buffer())); fclose($f); break; @@ -706,7 +707,7 @@ function Output($dest='', $name=''){ return ''; } - + /** *Decodes and returns the binary form of a field hexified value * @@ -719,7 +720,7 @@ function pdf_decode_field_value($value) { $call=$this->static_method_call('_hex2bin',$value); return $call; } - + /** *Encodes and returns the headecimal form of a field binary value * @@ -732,8 +733,8 @@ function pdf_encode_field_value($value) { $value=$this->static_method_call('_bin2hex',$value); return $value; } - - + + /** *Universal Php4/5 static call helper * @@ -742,14 +743,14 @@ function pdf_encode_field_value($value) { **/ function static_method_call($method) { //--------------------------------------------- - + $params_call=func_get_args(); array_shift($params_call); //var_dump($params_call); - + return call_user_func_array(array($this,$method),$params_call); } - + /** *Changes a field value that can be in hex <> or binary form () * @@ -758,21 +759,21 @@ function static_method_call($method) { **/ function replace_value($matches,$value) { //---------------------------------------------- - + array_shift($matches); if(($value!='')&&($matches[1]=="<")) //Value must be hexified.. - $value=$this->pdf_encode_field_value($value); - + $value=$this->pdf_encode_field_value($value); + $matches[2]=$value; $value_type_code=$matches[0]; //Should be V, DV or TU $matches[0]="/".$value_type_code." "; - + $value=implode("",$matches); //echo(htmlentities($value)); return $value; } - + /** *Core to change the value of a field property, inline. * @@ -783,18 +784,18 @@ function replace_value($matches,$value) { **/ function _set_field_value($line,$value) { //---------------------------------------- - + $verbose_set=($this->verbose&&($this->verbose_level>1)); //get the line content $CurLine =$this->pdf_entries[$line]; - + $OldLen=strlen($CurLine); - + //My PHP4/5 static call hack, only to make the callback $this->replace_value($matches,"$value") possible! $callback_code='$THIS=new FPDM("[_STATIC_]");return $THIS->replace_value($matches,"'.$value.'");'; - + $field_regexp='/^\/(\w+)\s?(\<|\()([^\)\>]*)(\)|\>)/'; - + if(preg_match($field_regexp,$CurLine)) { //modify it according to the new value $value $CurLine = preg_replace_callback( @@ -805,15 +806,15 @@ function _set_field_value($line,$value) { }else { if($verbose_set) echo("
WARNING:".htmlentities("Can not access to the value: $CurLine using regexp $field_regexp")); } - - + + $NewLen=strlen($CurLine); $Shift=$NewLen-$OldLen; $this->shift=$this->shift+$Shift; - + //Saves $this->pdf_entries[$line]=$CurLine; - + return $Shift; } @@ -822,7 +823,7 @@ function _encode_value($str) { $str="\xFE\xFF".iconv('UTF-8','UTF-16BE',$str); return $this->_bin2hex($str); } - + function _set_field_value2($line,$value,$append) { $CurLine=$this->pdf_entries[$line]; $OldLen=strlen($CurLine); @@ -855,57 +856,82 @@ function _set_field_value2($line,$value,$append) { /** *Changes the value of a field property, inline. * - *@param string $type supported values for type are 'default' , 'current' or 'tooltip' + *@param string $type supported values for type are 'default' , 'current' or 'tooltip' *@param string $name name of the field annotation to change the value *@param string $value the new value to set **/ function set_field_value($type,$name,$value) { //------------------------------------ $verbose_set=($this->verbose&&($this->verbose_level>1)); - + //Get the line(s) of the misc field values if(isset($this->value_entries["$name"])) { - + $object_id=$this->value_entries["$name"]["infos"]["object"]; - + if($type=="tooltip") { - + $offset_shift=$this->set_field_tooltip($name,$value); - + } elseif ($this->useCheckboxParser && isset($this->value_entries["$name"]['infos']['checkbox_state'])) { //FIX: set checkbox value + if (isset($this->value_entries["$name"]['infos']['parent_obj']) + && isset($this->value_entries["$name"]["infos"]["checkbox_no"]) + && isset($this->value_entries["$name"]["infos"]["checkbox_yes"])) { + $parent_obj = $this->value_entries["$name"]['infos']['parent_obj']; + // We search for the position of the parent object: + // The object ID has a space at the line end! + $parent_obj_line = array_search($parent_obj . ' 0 obj ', $this->pdf_entries); + if ($parent_obj_line === false) { + if ($this->verbose&&($this->verbose_level>1)) { + echo "
Could not find object [$parent_obj] which is the parent of checkbox [$name]"; + } + } else { + // insert a new line (with the selected value) in the parent object + $state = $this->value_entries["$name"]["infos"]["checkbox_no"]; + if ($value) { + $state = $this->value_entries["$name"]["infos"]["checkbox_yes"]; + } + + $state_line = " /V /$state "; + $this->pdf_entries[$parent_obj_line + 4] = $this->pdf_entries[$parent_obj_line + 4] . $state_line; + + $offset_shift = strlen($state_line); + $this->apply_offset_shift_from_object($parent_obj,$offset_shift); + } + } $offset_shift=$this->set_field_checkbox($name, $value); - //ENDFIX + //ENDFIX } else {//if(isset($this->value_entries["$name"]["values"]["$type"])) { // echo $this->value_entries["$name"]["values"]["$type"]; /* $field_value_line=$this->value_entries["$name"]["values"]["$type"]; $field_value_maxlen=$this->value_entries["$name"]["constraints"]["maxlen"]; - + if($field_value_maxlen) //Truncates the size if needed - $value=substr($value, 0, $field_value_maxlen); - + $value=substr($value, 0, $field_value_maxlen); + if($verbose_set) echo "
Change $type value of the field $name at line $field_value_line to '$value'"; $offset_shift=$this->_set_field_value($field_value_line,$value);*/ if(isset($this->value_entries[$name]["values"]["current"])) $offset_shift=$this->_set_field_value2($this->value_entries[$name]["values"]["current"],$value,false); else $offset_shift=$this->_set_field_value2($this->value_entries[$name]["infos"]["name_line"],$value,true); - } + } // }else // $this->Error("set_field_value failed as invalid valuetype $type for object $object_id"); - - + + //offset size shift will affect the next objects offsets taking into accound the order they appear in the file-- $this->apply_offset_shift_from_object($object_id,$offset_shift); - - } else + + } else $this->Error("field $name not found"); - + } - - + + /** *Changes the tooltip value of a field property, inline. - * + * *@param string $name name of the field annotation to change the value *@param string $value the new value to set *@return int offset_shift the size variation @@ -914,7 +940,7 @@ function set_field_tooltip($name,$value) { //------------------------------------ $offset_shift=0; $verbose_set=($this->verbose&&($this->verbose_level>1)); - + //Get the line(s) of the misc field values if(isset($this->value_entries["$name"])) { $field_tooltip_line=$this->value_entries["$name"]["infos"]["tooltip"]; @@ -924,11 +950,11 @@ function set_field_tooltip($name,$value) { }else { if($verbose_set) echo "
Change toolpip value aborted, the field $name has no tooltip definition."; } - } else + } else $this->Error("set_field_tooltip failed as the field $name does not exist"); return $offset_shift; } - + //FIX: parse checkbox definition /** *Changes the checkbox state. @@ -982,9 +1008,9 @@ public function set_field_checkbox($name, $value) return $offset_shift; } //ENDFIX - + /** - *Dumps the line entries + *Dumps the line entries * *@note for debug purposes *@access private @@ -1002,8 +1028,8 @@ function dumpEntries($entries,$tag="",$halt=false) { } if($halt) exit(); } - - + + /** *Dumps the string content * @@ -1023,7 +1049,7 @@ function dumpContent($content,$tag="",$halt=false) { } if($halt) exit(); } - + /** *Retrieves the content of a file as a string * @@ -1038,7 +1064,7 @@ function getContent($filename,$filetype) { $handle=fopen($filename,'rb'); $content = fread($handle, filesize($filename)); fclose($handle); - + if (!$content) $this->Error(sprintf('Cannot open '.$filetype.' file %s !', $filename)); @@ -1054,13 +1080,13 @@ function getContent($filename,$filetype) { $this->Error('Incremental updates are not supported'); $this->needAppearancesTrue = (strpos($content, '/NeedAppearances true')!==false); } - + /* if($this->verbose) { $this->dumpContent($content,"$filetype file content read"); }*/ return $content; } - + /** *Retrieves the content of a file as an array of lines entries * @@ -1073,14 +1099,14 @@ function getEntries($filename,$filetype) { //---------------------------------------- $content=$this->getContent($filename,$filetype); $entries=explode("\n",$content); - + /* if($this->verbose) { $this->dumpEntries($entries,"$filetype file entries"); }*/ return $entries; } - + /** *Retrieves a binary string from its hexadecimal representation * @@ -1094,22 +1120,22 @@ function _hex2bin ($hexString) { //echo "
_hex2bin($hexString)"; $BinStr = ''; - + $hexLength=strlen($hexString); // only hex numbers is allowed if ($hexLength % 2 != 0 || preg_match("/[^\da-fA-F]/",$hexString)) return FALSE; - - + + //Loop through the input and convert it for ($i = 0; $i < $hexLength; $i += 2) $BinStr .= '%'.substr ($hexString, $i, 2); - - + + // Raw url-decode and return the result return rawurldecode ($BinStr);//chr(hexdec()) } - - + + /** *Encodes a binary string to its hexadecimal representation * @@ -1127,9 +1153,9 @@ function _bin2hex($str) { $i++; } while ($i < strlen($str)); return $hex; - } - - + } + + /** * Extracts the map object for the xref table * @note PDF lines should have been previouly been parsed to make this work @@ -1139,18 +1165,18 @@ function get_xref_table() { //------------------------ return $this->value_entries['$_XREF_$']; } - + /** - * Extracts the offset of the xref table + * Extracts the offset of the xref table * @note PDF lines should have been previouly been parsed to make this work * @return int the xrefstart value */ function get_xref_start() { //------------------------ - return $this->value_entries['$_XREF_$']["infos"]["start"]["pointer"]; + return $this->value_entries['$_XREF_$']["infos"]["start"]["pointer"]; } - - + + /** * Extracts the line where the offset of the xref table is stored * @note PDF lines should have been previouly been parsed to make this work @@ -1160,7 +1186,7 @@ function get_xref_start_line() { //------------------------------- return $this->value_entries['$_XREF_$']["infos"]["start"]["line"]; } - + /** * Calculates the offset of the xref table * @@ -1172,8 +1198,8 @@ function get_xref_start_value() { $xref_start=$this->get_xref_start(); return $xref_start+$size_shift; } - - + + /** * Read the offset of the xref table directly from file content * @@ -1187,8 +1213,8 @@ function read_xref_start_value() { $chunks = preg_split('/\bxref\b/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE); return intval($chunks[1][1])-4; //-4 , relative to end of xref } - - + + /** * Calculates the new offset/xref for this object id by applying the offset_shift due to value changes * @@ -1198,14 +1224,14 @@ function read_xref_start_value() { */ function get_offset_object_value($object_id) { //-------------------------------------------- - + //Static is to keep forever... static $offsets=null; static $positions=null; static $shifts=null; - + //if(is_null($offsets)) { //...variables content set once. This is the beauty of php :) - + //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) $positions=$this->_get_positions_ordered(); //Makes it 0 indexed as object id starts from 1 and positions starts from 0 @@ -1213,14 +1239,14 @@ function get_offset_object_value($object_id) { //Shifts are already 0 indexed, don't change. $shifts=$this->shifts; //} - + $p=$positions[$object_id]; $offset=$offsets[$p]; $shift=$shifts[$p]; //size shift of the object due to value changes return $offset+$shift; } - - + + /** * Reads the offset of the xref table directly from file content * @@ -1237,60 +1263,60 @@ function read_offset_object_value($object_id) { $offset=intval($chars[1][1])-strlen($object_header)+strlen($previous_object_footer)+2; return $offset; } - - + + /** * Fix the offset of the xref table - * + * */ function fix_xref_start() { - //------------------------- - + //------------------------- + $pdf_entries=&$this->pdf_entries; - $verbose_fix=($this->verbose&&($this->verbose_level>1)); + $verbose_fix=($this->verbose&&($this->verbose_level>1)); $calculate_xrefstart_value=((!$this->safe_mode)||$this->check_mode); $extract_xrefstart_value_from_file=($this->safe_mode||$this->check_mode); - - if($calculate_xrefstart_value) { + + if($calculate_xrefstart_value) { $xref_start_value_calculated=$this->get_xref_start_value(); //get computed value from old one if(!$this->safe_mode) $xref_start_value=$xref_start_value_calculated; } - - if($extract_xrefstart_value_from_file) { + + if($extract_xrefstart_value_from_file) { $xref_start_value_safe=$this->read_xref_start_value();//read direct from new file content if($this->safe_mode) $xref_start_value=$xref_start_value_safe; - } + } if($this->check_mode) { //Compared calculated value with position value read direct from file if($xref_start_value_calculated != $xref_start_value_safe) { if($verbose_fix) echo "
xrefstart's value must be $xref_start_value_safe calculated is $xref_start_value_calculated.Don't worry, FPDFM-merge will fix it for you.
"; $xref_start_value=$xref_start_value_safe; //Overrides with the good value - if($this->halt_mode) + if($this->halt_mode) $this->Error("Halt on error mode enabled, aborting. Use \$pdf->set_modes('halt',false); to disable this mode and go further fixing corrupted pdf."); } else { if($verbose_fix) echo "
xrefstart's value for the file is correct and vaults $xref_start_value"; } } - + //updates xrefstart's value $xref_start_line=$this->get_xref_start_line(); $pdf_entries[$xref_start_line]="$xref_start_value"; } - + /** * Get the offsets table 0 indexed - * - * @return array $offsets + * + * @return array $offsets */ function _get_offsets_starting_from_zero() { //------------------------------------------- - $offsets=$this->offsets; - return array_values($offsets); + $offsets=$this->offsets; + return array_values($offsets); } - + /** * Sorts the position array by key - * + * * @return array $positions the ordered positions */ function _get_positions_ordered() { @@ -1299,57 +1325,57 @@ function _get_positions_ordered() { ksort($positions); return $positions; } - + /** * Fix the xref table by rebuilding its offsets entries - * + * */ function fix_xref_table() { //------------------------ - + $xref_table=$this->get_xref_table(); $xLen=$xref_table["infos"]["count"]; $pdf_entries=&$this->pdf_entries; - + //Do some checks - $offsets=$this->offsets; - //$offsets=array_values($offsets); + $offsets=$this->offsets; + //$offsets=array_values($offsets); $oLen=count($offsets); - - - if($xLen == $oLen) { //...to rectify xref entries - + + + if($xLen == $oLen) { //...to rectify xref entries + //jump over len and header, this is the first entry with n $first_xref_entry_line=$xref_table["infos"]["line"]+3; - + //echo "xREF:{$pdf_entries[$first_xref_entry_line]}"; - + //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) //$positions=$this->positions; //ksort($positions); $verbose_fix=($this->verbose&&($this->verbose>1)); $calculate_offset_value=((!$this->safe_mode)||$this->check_mode); $extract_offset_value_from_file=($this->safe_mode||$this->check_mode); - + //Get new file content (ie with values changed) //$this->get_buffer()=$this->get_buffer(); - + for($i=0;$i<$xLen;$i++) { - + $obj_id=$i+1; - + //Try two way to retrieve xref offset value of an object of the given id - + if($calculate_offset_value) { $offset_value_calculated=$this->get_offset_object_value($obj_id);; if(!$this->safe_mode) $offset_value=$offset_value_calculated; } - + if($extract_offset_value_from_file) { - $offset_value_read=$this->read_offset_object_value($obj_id); + $offset_value_read=$this->read_offset_object_value($obj_id); if($this->safe_mode) $offset_value=$offset_value_read; - } - + } + if($this->check_mode) { if($offset_value_calculated != $offset_value_read) { if($verbose_fix) echo "
Offset for object $obj_id read is $offset_value_read, calculated $offset_value_calculated"; @@ -1361,19 +1387,19 @@ function fix_xref_table() { } $pdf_entries[$first_xref_entry_line+$i]=sprintf('%010d 00000 n ',$offset_value); } - - }else { + + }else { //Congratulations you won the corrupted Error Prize - $this->Error("Number of objects ($oLen) differs with number of xrefs ($xLen), something , pdf xref table is corrupted :("); + $this->Error("Number of objects ($oLen) differs with number of xrefs ($xLen), something , pdf xref table is corrupted :("); } - - + + } - - + + /** * Applies a shift offset $shift from the object whose id is given as param - * + * * @note offset shift will affect the next objects taking into accound the order they appear in the file * @access public * @param int object_id the id whose size shift has changed @@ -1385,13 +1411,13 @@ function apply_offset_shift_from_object($object_id,$offset_shift) { $object_pos=$this->positions[$object_id]; //get the next object position $next_object_pos=$object_pos+1; - //Applies offset change to next following objects + //Applies offset change to next following objects $this->_apply_offset_shift($next_object_pos,$offset_shift); } /** * Applies a shift offset $shift starting at the index $from to the shifts array - * + * * @access private * @param int from the index to start apply the shift * @param int shift the shift value to use @@ -1400,19 +1426,19 @@ function _apply_offset_shift($from,$shift) { //------------------------------------------ $offsets=&$this->shifts; $params=array($from,$shift); - + foreach($offsets as $key=>$value) { if($key>=$from) { $offset=$offsets[$key]+$shift; $offsets[$key]=$offset; } } - + } - + /** * Decodes a PDF value according to the encoding - * + * * @access public * @param string $encoding the encoding to use for decoding the value, only 'hex' is supported * @param string value a value to decode @@ -1421,15 +1447,15 @@ function _apply_offset_shift($from,$shift) { function decodeValue($encoding,$value) { //---------------------------------------------- //echo "Decoding $encoding value($value)"; - if($encoding=="hex") + if($encoding=="hex") $value=$this->pdf_decode_field_value($value); return $value; } - + /** *Retrieve the list of supported filters * - *@note Uses $FPDM_FILTERS array built dynamically + *@note Uses $FPDM_FILTERS array built dynamically *@param String $sep a separator to merge filter names, default is '|' *@return String the suported filters **/ @@ -1438,8 +1464,8 @@ function getFilters($sep="|") { global $FPDM_FILTERS; return implode($sep,$FPDM_FILTERS); } - - + + /** *Get a filter by name * @@ -1460,15 +1486,15 @@ function getFilters($sep="|") { **/ function getFilter($name) { //--------------------- - + switch($name) { case "LZWDecode": $filter=new FilterLZW(); break; - case "ASCIIHexDecode": + case "ASCIIHexDecode": $filter=new FilterASCIIHex(); break; - case "ASCII85Decode": + case "ASCII85Decode": $filter=new FilterASCII85(); break; case "FlateDecode": @@ -1480,62 +1506,62 @@ function getFilter($name) { default: $this->Error("getFilter cannot open stream of object because filter '{$name}' is not supported, sorry."); } - - - return $filter; + + + return $filter; } - - + + //========= Stream manipulation stuff (alpha, not used by now!) ================ - + /** * Detect if the stream has a textual content - * + * * @access public * @param string $stream the string content of the stream - * @return boolean + * @return boolean */ function is_text_stream($stream_content) { //-------------------------------------- return preg_match("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream_content); } - + /** * changes the text value of a text stream - * + * * @access public * @param array $stream the stream defintion retrieved during PDF parsing - * @param string $value the new text value + * @param string $value the new text value */ function change_stream_value($stream,$value) { //-------------------------------------------- - + $entries=&$this->pdf_entries; - + $verbose_parsing=($this->verbose&&($this->verbose_level>3)); - + if($is_text_stream) { - + $OldLen=$stream["length"]["value"]; $lMin=$stream["start"]; $lMax=$stream["end"]; - + $stream_content=$this->_set_text_value($stream_content,$value); $NewLen=strlen($stream_content); - + for($l=$lMin;$l<=$lMax;$l++) { - + if($l==$lMin) { $entries[$lMin]=$stream_content; - + //Update the length $stream_def_line=$stream["length"]["line"]; $stream_def=$entries[$stream_def_line]; - + $stream_def=preg_replace("/\/Length\s*(\d+)/",'/Length '.$NewLen,$stream_def); - + $entries[$stream_def_line]=$stream_def; - + //update the filter type... $stream_def_line=$stream["filters"]["line"]; $stream_def=$entries[$stream_def_line]; @@ -1544,34 +1570,34 @@ function change_stream_value($stream,$value) { echo htmlentities(print_r($stream_def,true)); echo ""; } - + //...to filter Standard $stream_def=preg_replace($this->streams_filter,'/Standard ',$stream_def); - + $entries[$stream_def_line]=$stream_def; - + //Update the shift $size_shift=$NewLen-$OldLen; $this->apply_offset_shift_from_object($obj,$size_shift); - + }else if($lmin!=$lMax) { unset($entries[$l]); } } - + if($verbose_parsing) { var_dump($stream_content); } } } - + /** * Overrides value between Td and TJ, ommiting <> - * + * * @note core method * @access private * @param array $stream the stream defintion retrieved during PDF parsing - * @param string $value the new text value + * @param string $value the new text value */ function _set_text_value($stream,$value) { //--------------------------------------- @@ -1580,10 +1606,10 @@ function _set_text_value($stream,$value) { $stream=implode($chunks,''); return $stream; } - - + + //================================ - + function _extract_pdf_definition_value($name,$line,&$match) { //----------------------------------------------------------- global $FPDM_REGEXPS; @@ -1601,7 +1627,7 @@ function extract_pdf_definition_value($name,$line,&$match) { $value=$this->_extract_pdf_definition_value($name,$line,$match); }else $this->Error("extract_pdf_definition_value() does not support definition '$name'"); - + /*if($name=="/Type") { if(preg_match("/\//",$line,$foo)) { var_dump($match); @@ -1611,21 +1637,21 @@ function extract_pdf_definition_value($name,$line,&$match) { return $value; } - + /** - * Parses the lines entries of a PDF - * + * Parses the lines entries of a PDF + * * @access public * @param array $lines the FDF content as an array of lines * @return integer the number of lines the PDF has */ function parsePDFEntries(&$lines){ //-------------------------------- - + $entries=&$this->pdf_entries; - + $CountLines = count($entries); - + $Counter=0; $obj=0; //this is an invalid object id, we use it to know if we are into an object //FIX: parse checkbox definition @@ -1635,6 +1661,8 @@ function parsePDFEntries(&$lines){ $ap_d_line=0; $as=''; //ENDFIX + $parent_obj=0; + $fields_line=-1; $type=''; $subtype=''; $name=''; @@ -1647,23 +1675,23 @@ function parsePDFEntries(&$lines){ $stream=array(); $id_def=false; //true when parsing/decoding trailer ID $id_single_line_def=false; //true when the two ID chunks are one the same line - $id_multi_line_def=false; //true or OpenOffice 3.2 + $id_multi_line_def=false; //true or OpenOffice 3.2 $creator=''; $producer=''; $creationDate=''; - + $verbose_parsing=($this->verbose&&($this->verbose_level>3)); $verbose_decoding=($this->verbose&&($this->verbose_level>4)); - - if($this->verbose) $this->dumpContent("Starting to parse $CountLines entries","PDF parse"); - + + if($this->verbose) $this->dumpContent("Starting to parse $CountLines entries","PDF parse"); + while ( $Counter < $CountLines ){ - + $CurLine = $entries[$Counter]; - + if($verbose_parsing) $this->dumpContent($CurLine,"====Parsing Line($Counter)"); if(!$xref_table) { - + //Header of an object? if(preg_match("/^(\d+) (\d+) obj/",$CurLine,$match)) { $obj=intval($match[1]); @@ -1678,37 +1706,37 @@ function parsePDFEntries(&$lines){ $object["constraints"]["maxlen"]=$default_maxLen; $object["infos"]=array(); $object["infos"]["object"]=intval($obj); - $object["infos"]["tooltip"]=$default_tooltip_line; - - } else { - + $object["infos"]["tooltip"]=$default_tooltip_line; + + } else { + //Object has been opened if($obj) { - + //Footer of an object? if(preg_match("/endobj/",$CurLine,$match)) { if($verbose_parsing) $this->dumpContent("","====Closing object($obj) at line $Counter"); - + //We process fields here, save only Annotations texts that are supported by now if($subtype=="Widget") { - + if($name != '') { $lines["$name"]=$object; if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) is a text annotation of name '$name', saves it."); }//else // $this->Error("$type $subtype (obj id=$obj) is a text annotation without a name, this cannot be."); - - + + $values=$object["values"]; - + //Sanity values checks, watchdog. // if(!array_key_exists("current",$values)) $this->Error("Cannot find value (/V) for field $name"); // if(!array_key_exists("default",$values)) $this->Error("Cannot find default value (/DV) for field $name"); - + }else if($verbose_parsing) $this->dumpContent("Object $type $subtype (obj id=$obj) is not supported"); - - + + $object=null; $obj=0; //FIX: parse checkbox definition @@ -1718,14 +1746,15 @@ function parsePDFEntries(&$lines){ $ap_d_line=0; $as=''; //ENDFIX + $parent_obj=0; $type=''; $subtype=''; $name=''; $value=''; $maxLen=0; - + } else { - + if(preg_match("/\/Length\s*(\d+)/",$CurLine,$match)) { $stream["length"]=array("line"=>$Counter,"value"=>$match[1]); $stream["start"]=0; @@ -1733,92 +1762,92 @@ function parsePDFEntries(&$lines){ $stream["content"]=''; if($verbose_parsing) $this->dumpContent($CurLine,"->Stream filter length definition({$match[1]}) for object($obj) at line $Counter"); } - + //Handles single filter /Filter /filter_type as well as well as filter chains such as /Filter [/filter_type1 /filter_type2 .../filter_typeN] if(preg_match_all($this->streams_filter,$CurLine,$matches)) { - + //$this->dumpContent($this->streams_filter); /*$stream_filter=$match[1]; $stream_filter=trim(preg_replace('/(<<|\/Length\s*\d+|>>)/', '', $stream_filter),' '); $stream_filters=preg_split('/\s*\//',$stream_filter); array_shift($stream_filters);*/ $stream_filters=$matches[2]; - $stream["filters"]=array("line"=>$Counter, "type"=>$stream_filters); + $stream["filters"]=array("line"=>$Counter, "type"=>$stream_filters); if($verbose_parsing) { //var_dump($stream_filters); $stream_filter=implode(" ",$stream_filters); $this->dumpContent($CurLine,"->Stream filter type definition($stream_filter) for object($obj) at line $Counter"); } - } - + } + if(array_key_exists("length",$stream)) { //length is mandatory - + if(preg_match("/\b(stream|endstream)\b/",$CurLine,$match)) { - + if(!array_key_exists("filters",$stream)) {//filter type is optional, if none is given, its standard - + $stream["filters"]=array("type"=>array("Standard")); if($verbose_parsing) { var_dump($stream); $this->dumpContent($CurLine,"->No stream filter type definition for object($obj) was found, setting it to 'Standard'"); - } + } } - - + + if($match[1] == "stream") { if($verbose_parsing) $this->dumpContent($CurLine,"->Opening stream for object($obj) at line $Counter"); $stream["start"]=$Counter+1; }else { $stream["end"]=$Counter-1; - + $stream["content"]=implode("\n",array_slice($entries,$stream["start"],$stream["end"]-$stream["start"]+1)); - - - + + + $filters=$stream["filters"]["type"]; $f=count($filters); $stream_content=$stream["content"]; - + //var_dump($filters); - + //$filters_type=$filters["type"]; - + //now process the stream, ie unpack it if needed - //by decoding in the reverse order the streams have been encoded - //This is done by applying decode using the filters in the order given by /Filter. + //by decoding in the reverse order the streams have been encoded + //This is done by applying decode using the filters in the order given by /Filter. foreach($filters as $filter_name) { - + $stream_filter=$this->getFilter($filter_name); $stream_content=$stream_filter->decode($stream_content); - if($verbose_decoding) { + if($verbose_decoding) { echo "
Stream decoded using filter '$filter_name':[
";
 													var_dump($stream_content); //todo : manipulate this content and adjust offsets.
 													echo "
]
"; } } - + if($verbose_parsing) { $this->dumpEntries($stream); - + echo ""; if($this->is_text_stream($stream_content)) { echo "Stream text unfiltered:[
";
 												} else {
 													echo "Stream unfiltered:[
";
 												}
-												var_dump($stream_content); 
+												var_dump($stream_content);
 												echo "
]
"; $this->dumpContent($CurLine,"->Closing stream for object($obj) at line $Counter"); } - + $stream=array(); } - }else if($stream["start"]>0){ + }else if($stream["start"]>0){ //stream content line that will be processed on endstream... } - + } else { - + /* Producer /CreationDate (D:20101225151810+01'00')>> @@ -1828,19 +1857,19 @@ function parsePDFEntries(&$lines){ if($verbose_parsing) echo("Creator read ($creator)"); $this->info["Creator"]=$creator; } - + if(($producer=='')&&preg_match("/\/Producer\<([^\>]+)\>/",$CurLine,$values)) { $producer=$this->decodeValue("hex",$values[1]); if($verbose_parsing) echo("Producer read ($producer)"); $this->info["Producer"]=$producer; } - + if(($creationDate=='')&&preg_match("/\/CreationDate\(([^\)]+)\)/",$CurLine,$values)) { $creationDate=$values[1]; if($verbose_parsing) echo("Creation date read ($creationDate)"); $this->info["CreationDate"]=$creationDate; } - + //=== DEFINITION ==== //preg_match("/^\/Type\s+\/(\w+)$/",$CurLine,$match) $match=array(); @@ -1856,6 +1885,12 @@ function parsePDFEntries(&$lines){ echo("
Found D Line '$Counter'"); } $ap_d_line = $Counter; + } elseif (!$parent_obj&&$this->extract_pdf_definition_value("/Parent", $CurLine, $match)) { + $parent_obj = intval($match[1]); + if ($verbose_parsing) { + echo("
Found Parent is '$parent_obj'"); + } + $object["infos"]["parent_obj"] = $parent_obj; } elseif (($ap_line==$Counter-4)&&($ap_d_line==$Counter-2)&&($ap_d_yes=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) { $ap_d_yes=$match[1]; if ($verbose_parsing) { @@ -1879,48 +1914,48 @@ function parsePDFEntries(&$lines){ } //ENDFIX if(($type=='')||($subtype=='')||($name=="")) { - + if(($type=='')&&$this->extract_pdf_definition_value("/Type",$CurLine,$match)) { - + if($match[1]!='Border') { $type=$match[1]; if($verbose_parsing) echo("
Object's type is '$type'"); } - + } if(($subtype=='')&&$this->extract_pdf_definition_value("/Subtype",$CurLine,$match)) { - + $subtype=$match[1]; if($verbose_parsing) echo("
Object's subType is '$subtype'"); - + } if(($name=="")&&preg_match("/^\/T\s?\((.+)\)\s*$/",$this->_protectContentValues($CurLine),$match)) { - + $name=$this->_unprotectContentValues($match[1]); //FIX: convert ASCII object names to utf-8 // don't use utf8_encode($name) yet, it's core function since php 7.2 $name = mb_convert_encoding($name, 'UTF-8', 'ASCII'); //ENDFIX if($verbose_parsing) echo ("Object's name is '$name'"); - + $object["infos"]["name"]=$name; //Keep a track $object["infos"]["name_line"]=$Counter; - + //$this->dumpContent(" Name [$name]"); } - - }// else { - + + }// else { + //=== CONTENT ==== - + //$this->dumpContent($CurLine); //=== Now, start the serious work , read DV, V Values and eventually TU //note if(preg_match_all("/^\/(V|DV)\s+(\<|\))([^\)\>]+)(\)|\>)/",$CurLine,$matches)) { //do not work as all is encoded on the same line... - if(preg_match("/^\/(V|DV|TU)\s+([\<\(])/",$CurLine,$def)) { - + if(preg_match("/^\/(V|DV|TU)\s+([\<\(])/",$CurLine,$def)) { + //get an human readable format of value type and encoding - + if($def[1] == "TU") { $valuetype="info"; $object["infos"]["tooltip"]=$Counter; @@ -1928,49 +1963,50 @@ function parsePDFEntries(&$lines){ $valuetype=($def[1] == "DV") ? "default" : "current"; $object["values"]["$valuetype"]=$Counter; //Set a marker to process lately } - + $encoding=($def[2]=="<") ? "hex" : "plain"; - + if(preg_match("/^\/(V|DV|TU)\s+(\<|\)|\()([^\)\>]*)(\)|\>\))/",$CurLine,$values)) { $value=$values[3]; $value=$this->decodeValue($encoding,$value); - }else + }else $value=''; - - if($verbose_parsing) + + if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) has $encoding $valuetype value [$value] at line $Counter"); - - + + }else if(preg_match("/^\/MaxLen\s+(\d+)/",$CurLine,$values)) { $maxLen=$values[1]; $object["constraints"]["maxlen"]=intval($maxLen); } else if($verbose_parsing) echo("WARNING: definition ignored"); - + if(substr($CurLine,0,7)=='/Fields' && !$this->needAppearancesTrue) { $CurLine='/NeedAppearances true '.$CurLine; $entries[$Counter]=$CurLine; + $fields_line = $Counter; } - + //TODO: Fetch the XObject..and change Td <> Tj /* if(preg_match("/^\/AP/",$CurLine,$values)) { //die("stop"); $CurLine=''; //clear link to Xobject $entries[$Counter]=$CurLine; }*/ - -// } - + +// } + } - - + + } - + } - + //~~~~~Xref table header? ~~~~~~ if(preg_match("/\bxref\b/",$CurLine,$match)) { - + $xref_table=1; if($verbose_parsing) $this->dumpContent("->Starting xref table at line $Counter:[$CurLine]"); $lines['$_XREF_$']=array(); @@ -1981,73 +2017,73 @@ function parsePDFEntries(&$lines){ $start_pointer=$this->pointer+strpos($CurLine,"xref"); //HACK for PDFcreator 1.0.0 $lines['$_XREF_$']["infos"]["start"]["pointer"]=$start_pointer; } - + } $obj_header=false; - } else { + } else { //We are inside the xref table //$this->dumpContent($CurLine,""); $xref_table=$xref_table+1; switch($xref_table) { - case 2: + case 2: if(preg_match("/^(\d+) (\d+)/",$CurLine,$match)) { $refs_count=intval($match[2]);//xref_table length+1 (includes this line) - $lines['$_XREF_$']["infos"]["count"]=$refs_count-1; + $lines['$_XREF_$']["infos"]["count"]=$refs_count-1; if($verbose_parsing) $this->dumpContent("Xref table length is $refs_count"); }else if($verbose_parsing) $this->dumpContent("WARNING: Xref table length ignored!"); break; case 3: - //Should be 0000000000 65535 f + //Should be 0000000000 65535 f if($verbose_parsing) $this->dumpContent("this is Xref table header, should be 0000000000 65535 f "); break; default: //xref entries if($refs_count>0) { $xref=$xref_table-3; - + if($refs_count == 1) {//Last one , due to the shift, is the trailer if(!preg_match("/^trailer/",$CurLine)) //if not, Houston we have a problem - $this->Error("xref_table length corrupted?: Trailer not found at expected!"); + $this->Error("xref_table length corrupted?: Trailer not found at expected!"); else $trailer_table=1; }else { $lines['$_XREF_$']["entries"][$xref]=$CurLine; if($verbose_parsing) $this->dumpContent("Xref table entry for object $xref found."); } - $refs_count--; + $refs_count--; } else { //We are inside the trailer - + if($trailer_table==1) { //should be << - + if(trim($CurLine) != '') { //HACK: PDFCreator Version 1.0.0 has an extra CR after trailer - if(!preg_match("/<Error("trailer_table corrupted?; missing start delimiter << "); $trailer_table++; } - - + + }else if(($trailer_table>0)&&((!is_null($id_def))||preg_match("/^\/(Size|Root|Info|ID|DocChecksum)/",$CurLine,$match))) { - + //Value can be extracted using (\d+|\[[^\]]+\]) if(preg_match("/\/Size (\d+)/",$CurLine,$match)) { //Seems to match with xref entries count.. $size_read=$match[1]; $this->info["size"]=$size_read; if($verbose_parsing) $this->dumpContent("Size read ($size_read) for pdf found."); - } + } if(preg_match("/^\/ID\s*\[\s*<([\da-fA-F]+)/",$CurLine,$match)) { $oid=$match[1]; $id_def=true; if($verbose_parsing) $this->dumpContent("ID chunk one ($oid) for pdf found."); - + //Determines if the ID definition is one line... if(preg_match("/\>\s?\info["checksum"]=$checksum; if($verbose_parsing) $this->dumpContent("Checksum read ($checksum) for pdf found."); - } - - if(preg_match("/>>/",$CurLine,$match)) + } + + if(preg_match("/>>/",$CurLine,$match)) $trailer_table=-1;//negative value: expects startxref to follow - - + + } else { - + switch($trailer_table) { case -1://startxref - if(!preg_match("/^startxref/",$CurLine,$match)) + if(!preg_match("/^startxref/",$CurLine,$match)) $this->Error("startxref tag expected, read $CurLine"); break; case -2://startxref's value @@ -2089,34 +2125,46 @@ function parsePDFEntries(&$lines){ default://%%EOF } $trailer_table--; - + } - + } } - + } - + $this->pointer=$this->pointer+strlen($CurLine)+1; //+1 due to \n $Counter++; } - + if($this->verbose) { - - $refs=(array_key_exists('$_XREF_$',$lines)) ? $lines['$_XREF_$']["infos"]["count"] : 0; + + $refs=(array_key_exists('$_XREF_$',$lines)) ? $lines['$_XREF_$']["infos"]["count"] : 0; if($refs) { $this->dumpContent("PDF parse retrieved $refs refs"); }else { $this->dumpContent("PDF parse retrieved no refs, seems the xref table is broken or inacessible, this is bad!"); } } - + + // Remove checkbox parents from fields line, to make sure they will still be displayed, even if /NeedAppearance is set to true. + if ($fields_line !== -1) { + $fields = $this->pdf_entries[$fields_line]; + foreach ($lines as $line) { + if (isset($line['infos']['parent_obj'])) { + $parent_obj = $line['infos']['parent_obj']; + $fields = preg_replace('/(.*\/Fields\s\[(\d+\s\d\sR\s?)*)(' . $parent_obj . '\s\d\sR\s?)((\d+\s\d\sR\s?)*\])/', '$1$4', $fields); + } + } + $this->pdf_entries[$fields_line] = $fields; + } + return count($lines); } - + /** * Protect ( ) that may be in value or names - * + * * @access protected * @param string $content the FDF content to protect values * @return string the content protected @@ -2127,7 +2175,7 @@ function _protectContentValues($content) { $content=str_replace("\\)","#@$",$content); return $content; } - + /** * Unprotect ( ) that may be in value or names * @@ -2136,30 +2184,30 @@ function _protectContentValues($content) { * @return string the content unprotected */ function _unprotectContentValues($content) { - //-------------------------------------------------- + //-------------------------------------------------- $content=str_replace("$@#","\\(",$content); $content=str_replace("#@$","\\)",$content); $content=stripcslashes($content); return $content; } - + /** - * Parses the content of a FDF file and saved extracted field data + * Parses the content of a FDF file and saved extracted field data * *@access public *@return array $fields the data of the fields parsed */ function parseFDFContent(){ //------------------------- - + $content=$this->fdf_content; $content=$this->_protectContentValues($content);//protect ( ) that may be in value or names... - + if($this->verbose) $this->dumpEntries($content,"FDF parse"); - - //..so that this regexp can do its job without annoyances + + //..so that this regexp can do its job without annoyances if(preg_match_all("/(T|V)\s*\(([^\)]+)\)\s*\/(T|V)\s*\(([^\)]+)\)/", $content,$matches, PREG_PATTERN_ORDER)) { - + $fMax=count($matches[0]); $fields=array(); for($f=0;$f<$fMax;$f++) { @@ -2167,54 +2215,54 @@ function parseFDFContent(){ $name=''; if($matches[1][$f]=="V") { $value=$matches[2][$f]; - if($matches[3][$f]=="T") + if($matches[3][$f]=="T") $name=$matches[4][$f]; - else + else $this->Error("Field $f ignored , incomplete field declaration, name is expected"); } else { if($matches[1][$f]=="T") { $name=$matches[2][$f]; - if($matches[3][$f]=="V") + if($matches[3][$f]=="V") $value=$matches[4][$f]; - else + else $this->Error("Field $f ignored , incomplete field declaration, value is expected"); - } else + } else $this->Error("Field $f ignored , Invalid field keys ({$matches[0][$f]})"); } if($name!='') { - if(array_key_exists($name,$fields)) + if(array_key_exists($name,$fields)) $this->Error("Field $f ignored , already defined"); else { $name=$this->_unprotectContentValues($name); $value=$this->_unprotectContentValues($value); - if($this->verbose) + if($this->verbose) $this->dumpContent("FDF field [$name] has its value set to \"$value\""); $fields[$name]=$value; } - } else + } else $this->Error("Field $f ignored , no name"); - + } } else if($this->verbose) $this->dumpContent($fields,"FDF has no fields",false); - + if($this->verbose) $this->dumpContent($fields,"FDF parsed",false); - + return $fields; } - - + + /** * Close the opened file */ function closeFile() { //-------------------- if (isset($this->f) && is_resource($this->f)) { - fclose($this->f); + fclose($this->f); unset($this->f); - } + } } - + /** * Print Error and die * @@ -2222,12 +2270,12 @@ function closeFile() { */ function Error($msg) { //-------------------- - die('FPDF-Merge Error: '.$msg); + die('FPDF-Merge Error: '.$msg); } - - + + } - + } unset($__tmp);