Skip to content
This repository has been archived by the owner on Jan 5, 2018. It is now read-only.

Add a text filter that converts supported plain URLs to <drupal-url> elements #32

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
191 changes: 191 additions & 0 deletions src/Plugin/Filter/ConvertUrlToEmbedFilter.php
@@ -0,0 +1,191 @@
<?php

namespace Drupal\url_embed\Plugin\Filter;

use Drupal\Component\Utility\Html;
use Drupal\Core\Form\FormStateInterface;
use Drupal\filter\FilterProcessResult;
use Drupal\filter\Plugin\FilterBase;

/**
* Provides a filter to display embedded entities based on data attributes.
*
* @Filter(
* id = "url_embed_convert_links",
* title = @Translation("Convert URLs to URL embeds"),
* description = @Translation("Convert plain URLs to embed elements that can be rendered with the <em>Display embedded URLs</em> filter."),
* type = Drupal\filter\Plugin\FilterInterface::TYPE_TRANSFORM_REVERSIBLE,
* settings = {
* "url_prefix" = "",
* },
* )
*/
class ConvertUrlToEmbedFilter extends FilterBase {

/**
* {@inheritdoc}
*/
public function settingsForm(array $form, FormStateInterface $form_state) {
$form['url_prefix'] = [
'#type' => 'textfield',
'#title' => $this->t('URL prefix'),
'#default_value' => $this->settings['url_prefix'],
'#description' => $this->t('Optional prefix that will be used to indicate which URLs that apply. All URLs that are supported will be converted if empty. Example: EMBED-https://twitter.com/drupal/status/735873777683320832'),
];
return $form;
}


/**
* {@inheritdoc}
*/
public function process($text, $langcode) {
return new FilterProcessResult(static::convertUrls($text, $this->settings['url_prefix']));
}

/**
* Replaces appearances of supported URLs with <drupal-url> embed elements.
*
* Logic of this function is copied from _filter_url() and slightly adopted
* for our use case. _filter_url() is unfortunately not general enough to
* re-use it.
*
* @param string $text
* Text to be processed.
* @param string $url_prefix
* (Optional) Prefix that should be used to manually choose which URLs
* should be converted.
*
* @return string
* Processed text.
*/
public static function convertUrls($text, $url_prefix = '') {
// Tags to skip and not recurse into.
$ignore_tags = 'a|script|style|code|pre';

// Create an array which contains the regexps for each type of link.
// The key to the regexp is the name of a function that is used as
// callback function to process matches of the regexp. The callback function
// is to return the replacement for the match. The array is used and
// matching/replacement done below inside some loops.
$tasks = [];

// Prepare protocols pattern for absolute URLs.
// \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will replace
// any bad protocols with HTTP, so we need to support the identical list.
// While '//' is technically optional for MAILTO only, we cannot cleanly
// differ between protocols here without hard-coding MAILTO, so '//' is
// optional for all protocols.
// @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols()
$protocols = \Drupal::getContainer()->getParameter('filter_protocols');
$protocols = implode(':(?://)?|', $protocols) . ':(?://)?';

$valid_url_path_characters = "[\p{L}\p{M}\p{N}!\*\';:=\+,\.\$\/%#\[\]\-_~@&]";

// Allow URL paths to contain balanced parens
// 1. Used in Wikipedia URLs like /Primer_(film)
// 2. Used in IIS sessions like /S(dfd346)/
$valid_url_balanced_parens = '\('. $valid_url_path_characters . '+\)';

// Valid end-of-path characters (so /foo. does not gobble the period).
// 1. Allow =&# for empty URL parameters and other URL-join artifacts
$valid_url_ending_characters = '[\p{L}\p{M}\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')';

$valid_url_query_chars = '[a-zA-Z0-9!?\*\'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]';
$valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\/]';

//full path
//and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
$valid_url_path = '(?:(?:'.$valid_url_path_characters . '*(?:'.$valid_url_balanced_parens .$valid_url_path_characters . '*)*'. $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\/))';

// Prepare domain name pattern.
// The ICANN seems to be on track towards accepting more diverse top level
// domains, so this pattern has been "future-proofed" to allow for TLDs
// of length 2-64.
$domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b';
$ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}';
$auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@';
$trail = '('.$valid_url_path.'*)?(\\?'.$valid_url_query_chars .'*'.$valid_url_query_ending_chars.')?';

// Match absolute URLs.
$url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?";
$pattern = "`$url_prefix((?:$protocols)(?:$url_pattern))`u";
$tasks['replaceFullLinks'] = $pattern;

// HTML comments need to be handled separately, as they may contain HTML
// markup, especially a '>'. Therefore, remove all comment contents and add
// them back later.
_filter_url_escape_comments('', TRUE);
$text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);

// Split at all tags; ensures that no tags or attributes are processed.
$chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// PHP ensures that the array consists of alternating delimiters and
// literals, and begins and ends with a literal (inserting NULL as
// required). Therefore, the first chunk is always text:
$chunk_type = 'text';
// If a tag of $ignore_tags is found, it is stored in $open_tag and only
// removed when the closing tag is found. Until the closing tag is found,
// no replacements are made.
$open_tag = '';

for ($i = 0; $i < count($chunks); $i++) {
if ($chunk_type == 'text') {
// Only process this text if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// If there is a match, inject a link into this chunk via the callback
// function contained in $task.
$chunks[$i] = preg_replace_callback(
$pattern,
function ($match) {
if (\Drupal::service('url_embed')->getEmbed(Html::decodeEntities($match[1]))) {
return '<drupal-url data-embed-url="' . $match[1] . '"></drupal-url>';
}
else {
return $match[1];
}
},
$chunks[$i]
);
}
// Text chunk is done, so next chunk must be a tag.
$chunk_type = 'tag';
}
else {
// Only process this tag if there are no unclosed $ignore_tags.
if ($open_tag == '') {
// Check whether this tag is contained in $ignore_tags.
if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) {
$open_tag = $matches[1];
}
}
// Otherwise, check whether this is the closing tag for $open_tag.
else {
if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) {
$open_tag = '';
}
}
// Tag chunk is done, so next chunk must be text.
$chunk_type = 'text';
}
}

$text = implode($chunks);
// Revert to the original comment contents
_filter_url_escape_comments('', FALSE);
return preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text);
}

/**
* {@inheritdoc}
*/
public function tips($long = FALSE) {
if ($long) {
return $this->t('<p>You can convert plain URLs to &lt;drupal-url&gt; HTML elements. Those elements are later converted to embeds using "Display embedded URLs" text filter.</p>');
}
else {
return $this->t('You can convert plain URLs to embed elements.');
}
}

}
134 changes: 134 additions & 0 deletions src/Tests/ConvertUrlToEmbedFilterTest.php
@@ -0,0 +1,134 @@
<?php

namespace Drupal\url_embed\Tests;

use Drupal\editor\Entity\Editor;
use Drupal\filter\Entity\FilterFormat;
use Drupal\simpletest\WebTestBase;

/**
* Tests the url_embed_convert_links filter.
*
* @group url_embed
*/
class ConvertUrlToEmbedFilterTest extends WebTestBase {

/**
* Modules to enable.
*
* @var array
*/
public static $modules = ['url_embed', 'node', 'ckeditor'];

/**
* Set the configuration up.
*/
protected function setUp() {
parent::setUp();
// Create a page content type.
$this->drupalCreateContentType(['type' => 'page', 'name' => 'Basic page']);

// Create a text format and enable the url_embed filter.
$format = FilterFormat::create([
'format' => 'custom_format',
'name' => 'Custom format',
'filters' => [
'url_embed_convert_links' => [
'status' => 1,
'settings' => ['url_prefix' => ''],
],
],
]);
$format->save();

$editor_group = [
'name' => 'URL Embed',
'items' => [
'url',
],
];
$editor = Editor::create([
'format' => 'custom_format',
'editor' => 'ckeditor',
'settings' => [
'toolbar' => [
'rows' => [[$editor_group]],
],
],
]);
$editor->save();

// Create a user with required permissions.
$this->webUser = $this->drupalCreateUser([
'access content',
'create page content',
'use text format custom_format',
]);
$this->drupalLogin($this->webUser);
}

/**
* Tests the url_embed_convert_links filter.
*
* Ensures that iframes are getting rendered when valid urls
* are passed. Also tests situations when embed fails.
*/
public function testFilter() {
$content = 'before https://twitter.com/drupal/status/735873777683320832 after';
$settings = [];
$settings['type'] = 'page';
$settings['title'] = 'Test convert url to embed with sample Twitter url';
$settings['body'] = [['value' => $content, 'format' => 'custom_format']];
$node = $this->drupalCreateNode($settings);
$this->drupalGet('node/' . $node->id());
$this->assertRaw('<drupal-url data-embed-url="https://twitter.com/drupal/status/735873777683320832"></drupal-url>');
$this->assertNoText(strip_tags($content), 'URL does not appear in the output when embed is successful.');

$content = 'before /not-valid/url after';
$settings = [];
$settings['type'] = 'page';
$settings['title'] = 'Test convert url to embed with non valid URL';
$settings['body'] = [['value' => $content, 'format' => 'custom_format']];
$node = $this->drupalCreateNode($settings);
$this->drupalGet('node/' . $node->id());
$this->assertRaw($content);

/** @var \Drupal\filter\FilterFormatInterface $format */
$format = FilterFormat::load('custom_format');
$configuration = $format->filters('url_embed_convert_links')->getConfiguration();
$configuration['settings']['url_prefix'] = 'EMBED ';
$format->setFilterConfig('url_embed_convert_links', $configuration);
$format->save();

$content = 'before https://twitter.com/drupal/status/735873777683320832 after';
$settings = [];
$settings['type'] = 'page';
$settings['title'] = 'Test convert url to embed with sample Twitter url and no prefix';
$settings['body'] = [['value' => $content, 'format' => 'custom_format']];
$node = $this->drupalCreateNode($settings);
$this->drupalGet('node/' . $node->id());
$this->assertRaw(strip_tags($content));
$this->assertNoRaw('<drupal-url data-embed-url="https://twitter.com/drupal/status/735873777683320832"></drupal-url>');

$content = 'before EMBED https://twitter.com/drupal/status/735873777683320832 after';
$settings = [];
$settings['type'] = 'page';
$settings['title'] = 'Test convert url to embed with sample Twitter url with the prefix';
$settings['body'] = [['value' => $content, 'format' => 'custom_format']];
$node = $this->drupalCreateNode($settings);
$this->drupalGet('node/' . $node->id());
$this->assertRaw('<drupal-url data-embed-url="https://twitter.com/drupal/status/735873777683320832"></drupal-url>');
$this->assertNoText(strip_tags($content), 'URL does not appear in the output when embed is successful.');

$content = 'before Embed https://twitter.com/drupal/status/735873777683320832 after';
$settings = [];
$settings['type'] = 'page';
$settings['title'] = 'Test convert url to embed with sample Twitter url with wrong prefix';
$settings['body'] = [['value' => $content, 'format' => 'custom_format']];
$node = $this->drupalCreateNode($settings);
$this->drupalGet('node/' . $node->id());
$this->assertRaw(strip_tags($content));
$this->assertNoRaw('<drupal-url data-embed-url="https://twitter.com/drupal/status/735873777683320832"></drupal-url>');
}

}