From 6bbcbcc59e4cc443b7efbb4ec9a5d96c732f5511 Mon Sep 17 00:00:00 2001 From: jygaulier Date: Thu, 9 Nov 2023 16:13:18 +0100 Subject: [PATCH] declare actions (work on fields) inside job (work on records) --- .../Command/Thesaurus/Translator/Action.php | 303 +++++++++++ .../Translator/GlobalConfiguration.php | 55 +- .../Command/Thesaurus/Translator/Job.php | 475 ++++++------------ .../Thesaurus/Translator/TranslateCommand.php | 17 +- 4 files changed, 513 insertions(+), 337 deletions(-) create mode 100644 lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php new file mode 100644 index 0000000000..dacf75727f --- /dev/null +++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Action.php @@ -0,0 +1,303 @@ +job = $job; + $this->unicode = $unicode; + $this->output = $output; + $this->reportFormat = $this->job->getGlobalConfiguration()->getReportFormat(); + + if (array_key_exists('active', $action_conf) && $action_conf['active'] === false) { + $this->active = false; + return; + } + + + // get infos about the "source_field" + // + if (!($f = $job->getDataboxField($action_conf['source_field'])) ) { + $this->errors[] = sprintf("source field (%s) not found.", $action_conf['source_field']); + } + if (trim($f->get_tbranch()) === '') { + $this->errors[] = sprintf("source field (%s) not linked to thesaurus.", $action_conf['source_field']); + } + $this->tbranches = $job->getXpathTh()->query($f->get_tbranch()); + if (!$this->tbranches || $this->tbranches->length <= 0) { + $this->errors[] = sprintf("thesaurus branch(es) of source field (%s) not found.", $this->source_field['tbranch']); + } + $this->source_field = [ + 'id' => $f->get_id(), + 'name' => $f->get_name(), + 'tbranch' => $f->get_tbranch(), + 'lng' => array_key_exists('source_lng', $action_conf) ? $action_conf['source_lng'] : null + ]; + $this->selectRecordFieldIds[] = $this->source_field['id']; + + + // get infos about the "destination_fields" + // + $this->destination_fields = []; + foreach ($action_conf['destination_fields'] as $tf) { + list($lng, $fname) = explode(':', $tf); + if(!($f = $job->getDataboxField($fname)) ) { + $this->output->writeln(sprintf("undefined field (%s) (ignored).", $fname)); + continue; + } + $this->destination_fields[$lng] = [ + 'id' => $f->get_id(), + 'name' => $f->get_name(), + ]; + + $this->selectRecordFieldIds[] = $this->destination_fields[$lng]['id']; + } + + if (empty($this->destination_fields)) { + $this->errors[] = sprintf("no \"destination_field\" found."); + } + + // misc settings + $this->cleanupDestination = array_key_exists('cleanup_destination', $action_conf) && $action_conf['cleanup_destination'] === true; + $this->cleanupSource = array_key_exists('cleanup_source', $action_conf) ? $action_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE; + } + + public function doAction(array $metas, array &$meta_to_delete, array&$meta_to_add) + { + if ($this->cleanupDestination) { + foreach ($this->destination_fields as $lng => $destination_field) { + $destination_field_id = $destination_field['id']; + if(array_key_exists($destination_field_id, $metas)) { + foreach ($metas[$destination_field_id] as $meta_id => $value) { + $meta_to_delete[$meta_id] = $value; + } + } + unset($meta_id, $value); + } + unset($lng, $destination_field, $destination_field_id); + } + + $source_field_id = $this->source_field['id']; + + if(!array_key_exists($source_field_id, $metas)) { + // no source field value for this record: nothing to do + return; + } + + // loop on every value of the "source_field" + // + foreach ($metas[$source_field_id] as $source_meta_id => $source_value) { + + $t = $this->splitTermAndContext($source_value); + $q = '@w=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\''; + if ($t[1]) { + $q .= ' and @k=\'' . thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\''; + } + if(!is_null($this->source_field['lng'])) { + $q .= ' and @lng=\'' . thesaurus::xquery_escape($this->source_field['lng']) . '\''; + } + $q = '//sy[' . $q . ']/../sy'; + unset($t); + + // loop on every tbranch (one field may be linked to many branches) + // + $translations = []; // ONE translation per lng (first found in th) + /** @var DOMNode $tbranch */ + foreach ($this->tbranches as $tbranch) { + if (!($nodes = $this->job->getXpathTh()->query($q, $tbranch))) { + $this->output->writeln(sprintf("\t\t\t- \"%s\" xpath error on (%s), ignored.", $source_value, $q)); + continue; + } + + // loop on every synonym + // + /** @var DOMElement $node */ + foreach ($nodes as $node) { + $lng = $node->getAttribute('lng'); + + // ignore synonyms not in one of the "destination_field" languages + // + if (!array_key_exists($lng, $this->destination_fields)) { + continue; + } + + $translated_value = $node->getAttribute('v'); + + $destination_field_id = $this->destination_fields[$lng]['id']; + if (!array_key_exists($lng, $translations)) { + if ( + !array_key_exists($destination_field_id, $metas) + || ($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false + ) { + $translations[$lng] = [ + 'val' => $translated_value, + 'id' => null, + 'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name']) + ]; + $meta_to_add[$destination_field_id][] = $translated_value; + } + else { + $translations[$lng] = [ + 'val' => $translated_value, + 'id' => $destination_meta_id, + 'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name']) + ]; + unset($meta_to_delete[$destination_meta_id]); + } + unset($destination_meta_id); + } + unset($lng, $destination_field_id, $translated_value); + } + unset($nodes, $node, $tbranch); + } + unset($q); + + // cleanup source + // + if (empty($translations)) { + if($this->reportFormat === GlobalConfiguration::REPORT_FORMAT_ALL) { + $this->output->writeln(sprintf("\t\t\t- \"%s\" : no translation found.", $source_value)); + } + $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_NOT_TRANSLATED); + } + else if (count($translations) < count($this->destination_fields)) { + if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) { + $this->output->writeln(sprintf("\t\t\t- \"%s\" : incomplete translation.", $source_value)); + } + $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED); + } + else { + // complete translation (all target lng) + if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) { + $this->output->writeln(sprintf("\t\t\t- \"%s\" :", $source_value)); + } + $this->job->addToCondensedReport($source_value, job::CONDENSED_REPORT_FULLY_TRANSLATED); + + if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) { + // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination) + $used = false; + foreach($translations as $l => $t) { + if($t['id'] === $source_meta_id) { + $used = true; + break; + } + } + if(!$used) { + $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id]; + } + } + } + + if(in_array($this->reportFormat, [GlobalConfiguration::REPORT_FORMAT_ALL, GlobalConfiguration::REPORT_FORMAT_TRANSLATED])) { + foreach ($translations as $lng => $translation) { + $this->output->writeln(sprintf("\t\t\t\t- [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg'])); + } + } + + if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) { + // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination) + $used = false; + foreach($translations as $l => $t) { + if($t['id'] === $source_meta_id) { + $used = true; + break; + } + } + if(!$used) { + $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id]; + } + } + + unset($lng, $translations, $translation); + } + } + + private function splitTermAndContext($word) + { + $term = trim($word); + $context = ''; + if (($po = strpos($term, '(')) !== false) { + if (($pc = strpos($term, ')', $po)) !== false) { + $context = trim(substr($term, $po + 1, $pc - $po - 1)); + $term = trim(substr($term, 0, $po)); + } + else { + $context = trim(substr($term, $po + 1)); + $term = trim(substr($term, 0, $po)); + } + } + + return [$term, $context]; + } + + /** + * @return bool + */ + public function isActive(): bool + { + return $this->active; + } + + /** + * @return array + */ + public function getErrors(): array + { + return $this->errors; + } + + /** + * @return array + */ + public function getSelectRecordFieldIds(): array + { + return $this->selectRecordFieldIds; + } + +} diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php index c097c63634..e989c69380 100644 --- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php +++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/GlobalConfiguration.php @@ -5,6 +5,7 @@ use appbox; use collection; use databox; +use databox_field; use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Yaml\Yaml; use Unicode; @@ -13,6 +14,8 @@ { const CONFIG_DIR = "/config/translator/"; const CONFIG_FILE = "configuration.yml"; + const REPORT_FORMAT_ALL = "all"; + const REPORT_FORMAT_TRANSLATED = "translated"; private $configuration = null; @@ -47,7 +50,8 @@ private function __construct($appBox, Unicode $unicode, $global_conf, bool $dryR $sbas_name = $databox->get_dbname(); $this->databoxes[$sbas_id] = [ 'dbox' => $databox, - 'collections' => [] + 'collections' => [], + 'fields' => [], ]; $this->databoxes[$sbas_name] = &$this->databoxes[$sbas_id]; // list all collections @@ -57,16 +61,44 @@ private function __construct($appBox, Unicode $unicode, $global_conf, bool $dryR $this->databoxes[$sbas_id]['collections'][$coll_id] = $collection; $this->databoxes[$sbas_id]['collections'][$coll_name] = &$this->databoxes[$sbas_id]['collections'][$coll_id]; } + // list all fields + /** @var databox_field $dbf */ + foreach($databox->get_meta_structure() as $dbf) { + $field_id = $dbf->get_id(); + $field_name = $dbf->get_name(); + $this->databoxes[$sbas_id]['fields'][$field_id] = $dbf; + $this->databoxes[$sbas_id]['fields'][$field_name] = &$this->databoxes[$sbas_id]['fields'][$field_id]; + } } foreach($global_conf['jobs'] as $job_name => $job_conf) { - $this->jobs[$job_name] = new Job($this, $job_conf, $unicode, $output); + $job = new Job($this, $job_name, $job_conf, $unicode, $output); + if($job->isActive()) { + if($job->isValid()) { + $this->jobs[$job_name] = $job; + } + else { + $output->writeln("Configuration error(s)... :"); + foreach ($job->getErrors() as $err) { + $output->writeln(sprintf(" - %s", $err)); + } + $output->writeln("...Job ignored"); + } + } + else { + unset($job); + $output->writeln(sprintf("job \"%s\" is inactive: ignored.", $job_name)); + } } } /** * @param appbox $appBox + * @param Unicode $unicode * @param string $root + * @param bool $dryRun + * @param string $reportFormat + * @param OutputInterface $output * @return GlobalConfiguration * @throws ConfigurationException */ @@ -109,6 +141,25 @@ public function getCollection($sbasIdOrName, $collIdOrName) return $this->databoxes[$sbasIdOrName]['collections'][$collIdOrName] ?? null; } + /** + * @param string|int $sbasIdOrName + * @return databox_field[]|null + */ + public function getFields($sbasIdOrName) + { + return $this->databoxes[$sbasIdOrName] ?? null; + } + + /** + * @param string|int $sbasIdOrName + * @param string|int $collIdOrName + * @return databox_field|null + */ + public function getField($sbasIdOrName, $fieldIdOrName) + { + return $this->databoxes[$sbasIdOrName]['fields'][$fieldIdOrName] ?? null; + } + /** * @return bool */ diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php index e8d408d057..a9f76d4c9c 100644 --- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php +++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/Job.php @@ -2,9 +2,8 @@ namespace Alchemy\Phrasea\Command\Thesaurus\Translator; +use collection; use databox; -use DOMElement; -use DOMNode; use DOMNodeList; use DOMXpath; use PDO; @@ -14,13 +13,20 @@ class Job { - const NEVER_CLEANUP_SOURCE = 'never'; - const ALWAYS_CLEANUP_SOURCE = 'always'; - const CLEANUP_SOURCE_IF_TRANSLATED = 'if_translated'; + const CONDENSED_REPORT_NOT_TRANSLATED = 'notTranslated'; + const CONDENSED_REPORT_INCOMPLETELY_TRANSLATED = 'incompletelyTranslated'; + const CONDENSED_REPORT_FULLY_TRANSLATED = 'fullyTranslated'; private $active = true; + /** @var array[] */ + private $condensedReportCounts = [ + self::CONDENSED_REPORT_NOT_TRANSLATED => [], + self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED => [], + self::CONDENSED_REPORT_FULLY_TRANSLATED => [] + ]; + /** @var string[] */ private $errors = []; // error messages while parsing conf @@ -32,188 +38,128 @@ class Job private $selectRecordsSql = null; - /** @var array list of field ids of "source_field" (unique) and "destination_fields" (many) */ - private $selectRecordFieldIds; - - /** - * @var OutputInterface - */ + /** @var OutputInterface */ private $output; - private $source_field; // infos about the "source_field" - private $destination_fields; // infos about the "destination_fields" (key=lng) - - /** - * @var Unicode - */ - private $unicode; - /** @var DOMXpath|false|thesaurus_xpath */ private $xpathTh; - /** - * @var DOMNodeList - * The thesaurus branch(es) linked to the "source_field" - */ - private $tbranches; - - /** @var bool */ - private $cleanupDestination; + /** @var int flush every n records */ + private $bulk = 10; - /** @var string */ - private $cleanupSource = self::NEVER_CLEANUP_SOURCE; - /** - * @var GlobalConfiguration - */ + /** @var GlobalConfiguration */ private $globalConfiguration; - /** - * @var array - */ - private $job_conf; - /** - * @var \collection|null - */ + + /** @var collection|null */ private $setCollection = null; - /** - * @var string - */ + /** @var string */ private $setStatus = null; // format 0xx1100xx01xxxx - /** - * @var array - */ - private $notTranslated; // for condensed report - /** - * @var array - */ - private $incompletelyTranslated; // for condensed report - /** - * @var array - */ - private $fullyTranslated; // for condensed report - /** - * @var int - */ + /** @var Action[] */ + private $actions; + + /** @var array */ + private $selectRecordFieldIds = []; // ids of fields required by actions + + /** @var int */ private $recordsDone; // for condensed report + /** * @param GlobalConfiguration $globalConfiguration + * @param string $job_name * @param array $job_conf + * @param Unicode $unicode + * @param OutputInterface $output */ - public function __construct($globalConfiguration, $job_conf, Unicode $unicode, OutputInterface $output) + public function __construct(GlobalConfiguration $globalConfiguration, string $job_name, array $job_conf, Unicode $unicode, OutputInterface $output) { $this->globalConfiguration = $globalConfiguration; - $this->job_conf = $job_conf; - $this->unicode = $unicode; $this->output = $output; + $this->actions = []; + $this->errors = []; + if (array_key_exists('active', $job_conf) && $job_conf['active'] === false) { $this->active = false; - return; } - $this->errors = []; - foreach (['active', 'databox', 'source_field', 'destination_fields'] as $mandatory) { + foreach (['active', 'databox', 'actions'] as $mandatory) { if (!isset($job_conf[$mandatory])) { $this->errors[] = sprintf("Missing mandatory setting (%s).", $mandatory); } } - if (!empty($this->errors)) { - return; - } - if (!($this->databox = $globalConfiguration->getDatabox($job_conf['databox']))) { $this->errors[] = sprintf("unknown databox (%s).", $job_conf['databox']); - - return; } - + $ifCollection = null; + if(array_key_exists('if_collection', $job_conf)) { + if(!($ifCollection = $globalConfiguration->getCollection($this->databox->get_sbas_id(), $job_conf['if_collection']))) { + $this->errors[] = sprintf("unknown setCollection (%s).", $job_conf['if_collection']); + } + } if(array_key_exists('set_collection', $job_conf)) { if(!($this->setCollection = $globalConfiguration->getCollection($this->databox->get_sbas_id(), $job_conf['set_collection']))) { $this->errors[] = sprintf("unknown setCollection (%s).", $job_conf['set_collection']); - - return; } } - if(array_key_exists('set_status', $job_conf)) { $this->setStatus = $job_conf['set_status']; } - - - $cnx = $this->databox->get_connection(); - - // get infos about the "source_field" - // - $sql = "SELECT `id`, `tbranch` FROM `metadatas_structure` WHERE `name` = :name AND `tbranch` != ''"; - $stmt = $cnx->executeQuery($sql, [':name' => $job_conf['source_field']]); - $this->source_field = $stmt->fetch(PDO::FETCH_ASSOC); - $stmt->closeCursor(); - if (!$this->source_field) { - $this->errors[] = sprintf("field (%s) not found or not linked to thesaurus.", $job_conf['source_field']); - - return; + if(array_key_exists('bulk', $job_conf)) { + if( ($this->bulk = (int) $job_conf['bulk']) < 1) { + $this->errors[] = sprintf("bulk should be >= 1."); + } } - $this->source_field['lng'] = array_key_exists('source_lng', $job_conf) ? $job_conf['source_lng'] : null; - $this->selectRecordFieldIds[] = $this->source_field['id']; + $this->xpathTh = $this->databox->get_xpath_thesaurus(); - $this->tbranches = $this->xpathTh->query($this->source_field['tbranch']); - if (!$this->tbranches || $this->tbranches->length <= 0) { - $this->errors[] = sprintf("thesaurus branch(es) (%s) not found.", $this->source_field['tbranch']); - return; - } - // get infos about the "destination_fields" + + // load actions // - $this->destination_fields = []; - $sql = "SELECT `id`, `name` FROM `metadatas_structure` WHERE `name` = :name "; - $stmt = $cnx->prepare($sql); - foreach ($job_conf['destination_fields'] as $tf) { - list($lng, $fname) = explode(':', $tf); - $stmt->execute([':name' => $fname]); - if (!($row = $stmt->fetch(PDO::FETCH_ASSOC))) { - $this->output->writeln(sprintf("undefined field (%s) (ignored).", $fname)); - continue; + $this->selectRecordFieldIds = []; + foreach($job_conf['actions'] as $action_name => $action_conf) { + $action = new Action($this, $action_conf, $unicode, $this->output); + if($action->isActive()) { + $this->selectRecordFieldIds = array_merge($this->selectRecordFieldIds, $action->getSelectRecordFieldIds()); + $this->errors = array_merge($this->errors, $action->getErrors()); + $this->actions[$action_name] = $action; + } + else { + unset($action); + $output->writeln(sprintf("action \"%s\" of job \"%s\" is inactive: ignored.", $action_name, $job_name)); } - $this->destination_fields[$lng] = $row; - $stmt->closeCursor(); - - $this->selectRecordFieldIds[] = $row['id']; } + $this->selectRecordFieldIds = array_unique($this->selectRecordFieldIds); - if (empty($this->destination_fields)) { - $this->errors[] = sprintf("no \"destination_field\" found."); - + if (!empty($this->errors)) { return; } - // misc settings - $this->cleanupDestination = array_key_exists('cleanup_destination', $job_conf) && $job_conf['cleanup_destination'] === true; - $this->cleanupSource = array_key_exists('cleanup_source', $job_conf) ? $job_conf['cleanup_source'] : self::NEVER_CLEANUP_SOURCE; // build records select sql // - $selectRecordClauses = []; - $this->selectRecordParams = []; - if (array_key_exists('if_collection', $job_conf)) { - if (!($coll = $globalConfiguration->getCollection($job_conf['databox'], $job_conf['if_collection']))) { - $this->errors[] = sprintf("unknown collection (%s)", $job_conf['if_collection']); - - return; - } - $selectRecordClauses[] = "`coll_id` = :coll_id"; - $this->selectRecordParams[':coll_id'] = $coll->get_coll_id(); + $selectRecordsClauses = [ + '`record_id` > :minrid' + ]; + $this->selectRecordParams = [ + ':minrid' => 0 + ]; + if ($ifCollection) { + $selectRecordsClauses[] = "`coll_id` = :coll_id"; + $this->selectRecordParams[':coll_id'] = $ifCollection->get_coll_id(); } if (array_key_exists('if_status', $job_conf)) { - $selectRecordClauses[] = "`status` & b:sb_and = b:sb_equ"; + $selectRecordsClauses[] = "`status` & b:sb_and = b:sb_equ"; $this->selectRecordParams[':sb_and'] = str_replace(['0', 'x'], ['1', '0'], $job_conf['if_status']); $this->selectRecordParams[':sb_equ'] = str_replace('x', '0', $job_conf['if_status']); } - $selectRecordClauses[] = "`meta_struct_id` IN (" + $cnx = $this->databox->get_connection(); + $selectFieldsClause = "`meta_struct_id` IN (" . join( ',', array_map(function ($id) use ($cnx) { @@ -222,222 +168,109 @@ public function __construct($globalConfiguration, $job_conf, Unicode $unicode, O ) . ")"; - $sql = "SELECT `record_id`, `meta_struct_id`, `metadatas`.`id` AS meta_id, `value` FROM"; - $sql .= " `record` INNER JOIN `metadatas` USING(`record_id`)"; - $sql .= " WHERE " . join(" AND ", $selectRecordClauses); + $sql = "SELECT `r1`.`record_id`, `meta_struct_id`, `metadatas`.`id` AS meta_id, `value` FROM\n"; + $sql .= " (SELECT `record_id` FROM `record` WHERE ".join(" AND ", $selectRecordsClauses)." LIMIT ".$this->bulk.") AS `r1`\n"; + $sql .= " LEFT JOIN `metadatas` ON(`metadatas`.`record_id`=`r1`.`record_id`\n"; + $sql .= " AND " . $selectFieldsClause . ")\n"; $sql .= " ORDER BY `record_id` ASC"; $this->selectRecordsSql = $sql; } public function run() { - $cnx = $this->databox->get_connection(); - $stmt = $cnx->executeQuery($this->selectRecordsSql, $this->selectRecordParams); - - $currentRid = '?'; $this->recordsDone = 0; - $this->notTranslated = []; - $this->incompletelyTranslated = []; - $this->fullyTranslated = []; - - $metas = $emptyValues = array_map(function () { - return []; - }, array_flip($this->selectRecordFieldIds)); - while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { - if ($currentRid == '?') { - $currentRid = $row['record_id']; + + $stmt = $this->databox->get_connection()->prepare($this->selectRecordsSql); + + +// $metas = $emptyValues = array_map(function () { +// return []; +// }, array_flip($this->selectRecordFieldIds)); + + $minrid = 0; + do { + $nrows = 0; + $currentRid = '?'; + $metas = []; + + $this->selectRecordParams[':minrid'] = $minrid; + $stmt->execute($this->selectRecordParams); + while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { + $nrows++; + if ($currentRid == '?') { + $currentRid = $row['record_id']; + } + if ($row['record_id'] !== $currentRid) { + // change record + $this->doRecord($currentRid, $metas); // flush previous record + $currentRid = $row['record_id']; + // $metas = $emptyValues; + $metas = []; + } + if ($row['meta_struct_id'] !== null) { // left join : a record may not have any required field + if (!array_key_exists($row['meta_struct_id'], $metas)) { + $metas[$row['meta_struct_id']] = []; + } + $metas[$row['meta_struct_id']][$row['meta_id']] = $row['value']; + } } - if ($row['record_id'] !== $currentRid) { - // change record - $this->doRecord($currentRid, $metas); // flush previous record - $currentRid = $row['record_id']; - $metas = $emptyValues; + if ($currentRid !== '?') { + $this->doRecord($currentRid, $metas); // flush last record } - $metas[$row['meta_struct_id']][$row['meta_id']] = $row['value']; - } - if($currentRid !== '?') { - $this->doRecord($currentRid, $metas); // flush last record + $stmt->closeCursor(); + $minrid = $currentRid; } - - $stmt->closeCursor(); + while($nrows > 0); // condensed report // if($this->globalConfiguration->getReportFormat() === 'condensed') { $this->output->writeln(sprintf("%d records done.", $this->recordsDone)); - if(!empty($this->notTranslated)) { - ksort($this->notTranslated, SORT_STRING|SORT_FLAG_CASE); + if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED])) { + ksort($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED], SORT_STRING|SORT_FLAG_CASE); $this->output->writeln("Not translated terms:"); - foreach ($this->notTranslated as $term => $n) { + foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_NOT_TRANSLATED] as $term => $n) { $this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n)); } } - if(!empty($this->incompletelyTranslated)) { - ksort($this->incompletelyTranslated, SORT_STRING|SORT_FLAG_CASE); + if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED])) { + ksort($this->condensedReportCounts[self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED], SORT_STRING|SORT_FLAG_CASE); $this->output->writeln("Incompletely translated terms:"); - foreach ($this->incompletelyTranslated as $term => $n) { + foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_INCOMPLETELY_TRANSLATED] as $term => $n) { $this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n)); } } - if(!empty($this->fullyTranslated)) { - ksort($this->fullyTranslated, SORT_STRING|SORT_FLAG_CASE); + if(!empty($this->condensedReportCounts[self::CONDENSED_REPORT_FULLY_TRANSLATED])) { + ksort($this->condensedReportCounts[self::CONDENSED_REPORT_FULLY_TRANSLATED], SORT_STRING|SORT_FLAG_CASE); $this->output->writeln("Fully translated terms:"); - foreach ($this->fullyTranslated as $term => $n) { + foreach ($this->condensedReportCounts[self::CONDENSED_REPORT_FULLY_TRANSLATED] as $term => $n) { $this->output->writeln(sprintf(" - \"%s\" (%d times)", $term, $n)); } } } } - private function doRecord($record_id, $metas) + private function doRecord(string $record_id, array $metas) { $reportFormat = $this->globalConfiguration->getReportFormat(); - if($reportFormat !== 'condensed') { - $this->output->writeln(sprintf("record id: %s", $record_id)); + $this->output->writeln(sprintf("\trecord id: %s", $record_id)); } - $source_field_id = $this->source_field['id']; $meta_to_delete = []; // key = id, to easily keep unique $meta_to_add = []; - if ($this->cleanupDestination) { - foreach ($this->destination_fields as $lng => $destination_field) { - $destination_field_id = $destination_field['id']; - foreach ($metas[$destination_field_id] as $meta_id => $value) { - $meta_to_delete[$meta_id] = $value; - } - unset($meta_id, $value); - } - unset($lng, $destination_field, $destination_field_id); - } - - // loop on every value of the "source_field" + // play all actions // - foreach ($metas[$source_field_id] as $source_meta_id => $source_value) { - - $t = $this->splitTermAndContext($source_value); - $q = '@w=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[0])) . '\''; - if ($t[1]) { - $q .= ' and @k=\'' . \thesaurus::xquery_escape($this->unicode->remove_indexer_chars($t[1])) . '\''; - } - if(!is_null($this->source_field['lng'])) { - $q .= ' and @lng=\'' . \thesaurus::xquery_escape($this->source_field['lng']) . '\''; - } - $q = '//sy[' . $q . ']/../sy'; - unset($t); - - // loop on every tbranch (one field may be linked to many branches) - // - $translations = []; // ONE translation per lng (first found in th) - /** @var DOMNode $tbranch */ - foreach ($this->tbranches as $tbranch) { - if (!($nodes = $this->xpathTh->query($q, $tbranch))) { - $this->output->writeln(sprintf(" - \"%s\" xpath error on (%s), ignored.", $source_value, $q)); - continue; - } - - // loop on every synonym - // - /** @var DOMElement $node */ - foreach ($nodes as $node) { - $lng = $node->getAttribute('lng'); - - // ignore synonyms not in one of the "destination_field" languages - // - if (!array_key_exists($lng, $this->destination_fields)) { - continue; - } - - $translated_value = $node->getAttribute('v'); - - $destination_field_id = $this->destination_fields[$lng]['id']; - if (!array_key_exists($lng, $translations)) { - if (($destination_meta_id = array_search($translated_value, $metas[$destination_field_id])) === false) { - $translations[$lng] = [ - 'val' => $translated_value, - 'id' => null, - 'msg' => sprintf(" --> %s", $this->destination_fields[$lng]['name']) - ]; - $meta_to_add[$destination_field_id][] = $translated_value; - } - else { - $translations[$lng] = [ - 'val' => $translated_value, - 'id' => $destination_meta_id, - 'msg' => sprintf("already in %s", $this->destination_fields[$lng]['name']) - ]; - unset($meta_to_delete[$destination_meta_id]); - } - unset($destination_meta_id); - } - unset($lng, $destination_field_id, $translated_value); - } - unset($nodes, $node, $tbranch); - } - unset($q); - - // cleanup source - // - if (empty($translations)) { - if($reportFormat === 'all') { - $this->output->writeln(sprintf(" - \"%s\" : no translation found.", $source_value)); - } - $this->addToCondensedReport($source_value, $this->notTranslated); + foreach($this->actions as $action_name => $action) { + if($reportFormat !== 'condensed') { + $this->output->writeln(sprintf("\t\tplaying action \"%s\"", $action_name)); } - else if (count($translations) < count($this->destination_fields)) { - if(in_array($reportFormat, ['all', 'translated'])) { - $this->output->writeln(sprintf(" - \"%s\" : incomplete translation.", $source_value)); - } - $this->addToCondensedReport($source_value, $this->incompletelyTranslated); - } - else { - // complete translation (all target lng) - if(in_array($reportFormat, ['all', 'translated'])) { - $this->output->writeln(sprintf(" - \"%s\" :", $source_value)); - } - $this->addToCondensedReport($source_value, $this->fullyTranslated); - - if ($this->cleanupSource === self::CLEANUP_SOURCE_IF_TRANSLATED) { - // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination) - $used = false; - foreach($translations as $l => $t) { - if($t['id'] === $source_meta_id) { - $used = true; - break; - } - } - if(!$used) { - $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id]; - } - } - } - - if(in_array($reportFormat, ['all', 'translated'])) { - foreach ($translations as $lng => $translation) { - $this->output->writeln(sprintf(" - [%s] \"%s\" %s", $lng, $translation['val'], $translation['msg'])); - } - } - - if ($this->cleanupSource === self::ALWAYS_CLEANUP_SOURCE) { - // do NOT delete the source term if one translation found it as already present as destination (possible if source=destination) - $used = false; - foreach($translations as $l => $t) { - if($t['id'] === $source_meta_id) { - $used = true; - break; - } - } - if(!$used) { - $meta_to_delete[$source_meta_id] = $metas[$source_field_id][$source_meta_id]; - } - } - - unset($lng, $translations, $translation); + $action->doAction($metas, $meta_to_delete, $meta_to_add); } - unset($metas, $source_meta_id, $source_value); + unset($metas); $actions = []; @@ -494,36 +327,29 @@ private function doRecord($record_id, $metas) $this->recordsDone++; } - private function addToCondensedReport($term, &$where) + public function addToCondensedReport(string $term, string $where) { if($this->globalConfiguration->getReportFormat() !== 'condensed') { return; } - if(!array_key_exists($term, $where)) { - $where[$term] = 0; + if(!array_key_exists($where, $this->condensedReportCounts)) { + $this->condensedReportCounts[$where] = []; } - $where[$term]++; + if(!array_key_exists($term, $this->condensedReportCounts[$where])) { + $this->condensedReportCounts[$where][$term] = 0; + } + $this->condensedReportCounts[$where][$term]++; } - private function splitTermAndContext($word) - { - $term = trim($word); - $context = ''; - if (($po = strpos($term, '(')) !== false) { - if (($pc = strpos($term, ')', $po)) !== false) { - $context = trim(substr($term, $po + 1, $pc - $po - 1)); - $term = trim(substr($term, 0, $po)); - } - else { - $context = trim(substr($term, $po + 1)); - $term = trim(substr($term, 0, $po)); - } - } - return [$term, $context]; + /** + * @return GlobalConfiguration + */ + public function getGlobalConfiguration(): GlobalConfiguration + { + return $this->globalConfiguration; } - /** * @return string[] */ @@ -553,5 +379,16 @@ public function isActive(): bool return $this->active; } + public function getDataboxField(string $fieldIdOrName) + { + return $this->globalConfiguration->getField($this->databox->get_sbas_id(), $fieldIdOrName); + } + /** + * @return DOMXpath|false|thesaurus_xpath + */ + public function getXpathTh() + { + return $this->xpathTh; + } } diff --git a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php index 1758d50c82..b5a43ac9cb 100644 --- a/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php +++ b/lib/Alchemy/Phrasea/Command/Thesaurus/Translator/TranslateCommand.php @@ -94,22 +94,7 @@ protected function doExecute(InputInterface $input, OutputInterface $output) */ foreach ($this->config->getJobs() as $jobName => $job) { $output->writeln(""); - $output->writeln(sprintf("======== Playing job %s ========", $jobName)); - - if(!$job->isValid()) { - $output->writeln("Configuration error(s)... :"); - foreach ($job->getErrors() as $err) { - $output->writeln(sprintf(" - %s", $err)); - } - $output->writeln("...Job ignored"); - - continue; - } - - if(!$job->isActive()) { - $output->writeln(sprintf("job is inactive, skipped.")); - continue; - } + $output->writeln(sprintf("Playing job \"%s\"", $jobName)); $job->run(); }