diff --git a/src/Plugin/Filter/ConvertUrlToEmbedFilter.php b/src/Plugin/Filter/ConvertUrlToEmbedFilter.php new file mode 100644 index 0000000..8489309 --- /dev/null +++ b/src/Plugin/Filter/ConvertUrlToEmbedFilter.php @@ -0,0 +1,191 @@ +Display embedded URLs filter."), + * type = Drupal\filter\Plugin\FilterInterface::TYPE_TRANSFORM_REVERSIBLE, + * settings = { + * "url_prefix" = "", + * }, + * ) + */ +class ConvertUrlToEmbedFilter extends FilterBase { + + /** + * {@inheritdoc} + */ + public function settingsForm(array $form, FormStateInterface $form_state) { + $form['url_prefix'] = [ + '#type' => 'textfield', + '#title' => $this->t('URL prefix'), + '#default_value' => $this->settings['url_prefix'], + '#description' => $this->t('Optional prefix that will be used to indicate which URLs that apply. All URLs that are supported will be converted if empty. Example: EMBED-https://twitter.com/drupal/status/735873777683320832'), + ]; + return $form; + } + + + /** + * {@inheritdoc} + */ + public function process($text, $langcode) { + return new FilterProcessResult(static::convertUrls($text, $this->settings['url_prefix'])); + } + + /** + * Replaces appearances of supported URLs with embed elements. + * + * Logic of this function is copied from _filter_url() and slightly adopted + * for our use case. _filter_url() is unfortunately not general enough to + * re-use it. + * + * @param string $text + * Text to be processed. + * @param string $url_prefix + * (Optional) Prefix that should be used to manually choose which URLs + * should be converted. + * + * @return string + * Processed text. + */ + public static function convertUrls($text, $url_prefix = '') { + // Tags to skip and not recurse into. + $ignore_tags = 'a|script|style|code|pre'; + + // Create an array which contains the regexps for each type of link. + // The key to the regexp is the name of a function that is used as + // callback function to process matches of the regexp. The callback function + // is to return the replacement for the match. The array is used and + // matching/replacement done below inside some loops. + $tasks = []; + + // Prepare protocols pattern for absolute URLs. + // \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will replace + // any bad protocols with HTTP, so we need to support the identical list. + // While '//' is technically optional for MAILTO only, we cannot cleanly + // differ between protocols here without hard-coding MAILTO, so '//' is + // optional for all protocols. + // @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() + $protocols = \Drupal::getContainer()->getParameter('filter_protocols'); + $protocols = implode(':(?://)?|', $protocols) . ':(?://)?'; + + $valid_url_path_characters = "[\p{L}\p{M}\p{N}!\*\';:=\+,\.\$\/%#\[\]\-_~@&]"; + + // Allow URL paths to contain balanced parens + // 1. Used in Wikipedia URLs like /Primer_(film) + // 2. Used in IIS sessions like /S(dfd346)/ + $valid_url_balanced_parens = '\('. $valid_url_path_characters . '+\)'; + + // Valid end-of-path characters (so /foo. does not gobble the period). + // 1. Allow =&# for empty URL parameters and other URL-join artifacts + $valid_url_ending_characters = '[\p{L}\p{M}\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')'; + + $valid_url_query_chars = '[a-zA-Z0-9!?\*\'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]'; + $valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\/]'; + + //full path + //and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/ + $valid_url_path = '(?:(?:'.$valid_url_path_characters . '*(?:'.$valid_url_balanced_parens .$valid_url_path_characters . '*)*'. $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\/))'; + + // Prepare domain name pattern. + // The ICANN seems to be on track towards accepting more diverse top level + // domains, so this pattern has been "future-proofed" to allow for TLDs + // of length 2-64. + $domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b'; + $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}'; + $auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@'; + $trail = '('.$valid_url_path.'*)?(\\?'.$valid_url_query_chars .'*'.$valid_url_query_ending_chars.')?'; + + // Match absolute URLs. + $url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?"; + $pattern = "`$url_prefix((?:$protocols)(?:$url_pattern))`u"; + $tasks['replaceFullLinks'] = $pattern; + + // HTML comments need to be handled separately, as they may contain HTML + // markup, especially a '>'. Therefore, remove all comment contents and add + // them back later. + _filter_url_escape_comments('', TRUE); + $text = preg_replace_callback('``s', '_filter_url_escape_comments', $text); + + // Split at all tags; ensures that no tags or attributes are processed. + $chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // PHP ensures that the array consists of alternating delimiters and + // literals, and begins and ends with a literal (inserting NULL as + // required). Therefore, the first chunk is always text: + $chunk_type = 'text'; + // If a tag of $ignore_tags is found, it is stored in $open_tag and only + // removed when the closing tag is found. Until the closing tag is found, + // no replacements are made. + $open_tag = ''; + + for ($i = 0; $i < count($chunks); $i++) { + if ($chunk_type == 'text') { + // Only process this text if there are no unclosed $ignore_tags. + if ($open_tag == '') { + // If there is a match, inject a link into this chunk via the callback + // function contained in $task. + $chunks[$i] = preg_replace_callback( + $pattern, + function ($match) { + if (\Drupal::service('url_embed')->getEmbed(Html::decodeEntities($match[1]))) { + return ''; + } + else { + return $match[1]; + } + }, + $chunks[$i] + ); + } + // Text chunk is done, so next chunk must be a tag. + $chunk_type = 'tag'; + } + else { + // Only process this tag if there are no unclosed $ignore_tags. + if ($open_tag == '') { + // Check whether this tag is contained in $ignore_tags. + if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) { + $open_tag = $matches[1]; + } + } + // Otherwise, check whether this is the closing tag for $open_tag. + else { + if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) { + $open_tag = ''; + } + } + // Tag chunk is done, so next chunk must be text. + $chunk_type = 'text'; + } + } + + $text = implode($chunks); + // Revert to the original comment contents + _filter_url_escape_comments('', FALSE); + return preg_replace_callback('``', '_filter_url_escape_comments', $text); + } + + /** + * {@inheritdoc} + */ + public function tips($long = FALSE) { + if ($long) { + return $this->t('

You can convert plain URLs to <drupal-url> HTML elements. Those elements are later converted to embeds using "Display embedded URLs" text filter.

'); + } + else { + return $this->t('You can convert plain URLs to embed elements.'); + } + } + +} diff --git a/src/Tests/ConvertUrlToEmbedFilterTest.php b/src/Tests/ConvertUrlToEmbedFilterTest.php new file mode 100644 index 0000000..7e008d6 --- /dev/null +++ b/src/Tests/ConvertUrlToEmbedFilterTest.php @@ -0,0 +1,134 @@ +drupalCreateContentType(['type' => 'page', 'name' => 'Basic page']); + + // Create a text format and enable the url_embed filter. + $format = FilterFormat::create([ + 'format' => 'custom_format', + 'name' => 'Custom format', + 'filters' => [ + 'url_embed_convert_links' => [ + 'status' => 1, + 'settings' => ['url_prefix' => ''], + ], + ], + ]); + $format->save(); + + $editor_group = [ + 'name' => 'URL Embed', + 'items' => [ + 'url', + ], + ]; + $editor = Editor::create([ + 'format' => 'custom_format', + 'editor' => 'ckeditor', + 'settings' => [ + 'toolbar' => [ + 'rows' => [[$editor_group]], + ], + ], + ]); + $editor->save(); + + // Create a user with required permissions. + $this->webUser = $this->drupalCreateUser([ + 'access content', + 'create page content', + 'use text format custom_format', + ]); + $this->drupalLogin($this->webUser); + } + + /** + * Tests the url_embed_convert_links filter. + * + * Ensures that iframes are getting rendered when valid urls + * are passed. Also tests situations when embed fails. + */ + public function testFilter() { + $content = 'before https://twitter.com/drupal/status/735873777683320832 after'; + $settings = []; + $settings['type'] = 'page'; + $settings['title'] = 'Test convert url to embed with sample Twitter url'; + $settings['body'] = [['value' => $content, 'format' => 'custom_format']]; + $node = $this->drupalCreateNode($settings); + $this->drupalGet('node/' . $node->id()); + $this->assertRaw(''); + $this->assertNoText(strip_tags($content), 'URL does not appear in the output when embed is successful.'); + + $content = 'before /not-valid/url after'; + $settings = []; + $settings['type'] = 'page'; + $settings['title'] = 'Test convert url to embed with non valid URL'; + $settings['body'] = [['value' => $content, 'format' => 'custom_format']]; + $node = $this->drupalCreateNode($settings); + $this->drupalGet('node/' . $node->id()); + $this->assertRaw($content); + + /** @var \Drupal\filter\FilterFormatInterface $format */ + $format = FilterFormat::load('custom_format'); + $configuration = $format->filters('url_embed_convert_links')->getConfiguration(); + $configuration['settings']['url_prefix'] = 'EMBED '; + $format->setFilterConfig('url_embed_convert_links', $configuration); + $format->save(); + + $content = 'before https://twitter.com/drupal/status/735873777683320832 after'; + $settings = []; + $settings['type'] = 'page'; + $settings['title'] = 'Test convert url to embed with sample Twitter url and no prefix'; + $settings['body'] = [['value' => $content, 'format' => 'custom_format']]; + $node = $this->drupalCreateNode($settings); + $this->drupalGet('node/' . $node->id()); + $this->assertRaw(strip_tags($content)); + $this->assertNoRaw(''); + + $content = 'before EMBED https://twitter.com/drupal/status/735873777683320832 after'; + $settings = []; + $settings['type'] = 'page'; + $settings['title'] = 'Test convert url to embed with sample Twitter url with the prefix'; + $settings['body'] = [['value' => $content, 'format' => 'custom_format']]; + $node = $this->drupalCreateNode($settings); + $this->drupalGet('node/' . $node->id()); + $this->assertRaw(''); + $this->assertNoText(strip_tags($content), 'URL does not appear in the output when embed is successful.'); + + $content = 'before Embed https://twitter.com/drupal/status/735873777683320832 after'; + $settings = []; + $settings['type'] = 'page'; + $settings['title'] = 'Test convert url to embed with sample Twitter url with wrong prefix'; + $settings['body'] = [['value' => $content, 'format' => 'custom_format']]; + $node = $this->drupalCreateNode($settings); + $this->drupalGet('node/' . $node->id()); + $this->assertRaw(strip_tags($content)); + $this->assertNoRaw(''); + } + +}