From 5d34b9321e2764c6e974cf3edf19e6e2153b466b Mon Sep 17 00:00:00 2001 From: Afuna Date: Fri, 20 Feb 2015 17:09:34 -0800 Subject: [PATCH] [#1213] Rewrites http:// video embeds to use protocol relative URLs * ... only if they support them (all the big sites do, smaller ones may or may not). We don't want to force to https if it isn't supported because the behavior is unreliable. An http embed on https may throw up a browser warning. A non-working https embed may, e.g., show a cryptic error message about internal server error, a blank page, or (in one case) an unrelated internal service * added arguments so that we only change the URL if we're going to display the embed (as opposed to editing the embed, or cleaning for saving in the database). Fixes #1213. --- .../DW/Controller/Journal/EmbeddedContent.pm | 1 + cgi-bin/DW/Hooks/EmbedWhitelist.pm | 67 ++++++++++--------- cgi-bin/LJ/CleanHTML.pm | 8 ++- cgi-bin/LJ/EmbedModule.pm | 5 +- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/cgi-bin/DW/Controller/Journal/EmbeddedContent.pm b/cgi-bin/DW/Controller/Journal/EmbeddedContent.pm index cd9876492d..ad254801a8 100644 --- a/cgi-bin/DW/Controller/Journal/EmbeddedContent.pm +++ b/cgi-bin/DW/Controller/Journal/EmbeddedContent.pm @@ -73,6 +73,7 @@ sub embedcontent_handler { journalid => $journalid, moduleid => $moduleid, preview => $preview, + display_as_content => 1, )->{content}; $r->print(qq{$content}); diff --git a/cgi-bin/DW/Hooks/EmbedWhitelist.pm b/cgi-bin/DW/Hooks/EmbedWhitelist.pm index dc16315019..b9718a92d6 100755 --- a/cgi-bin/DW/Hooks/EmbedWhitelist.pm +++ b/cgi-bin/DW/Hooks/EmbedWhitelist.pm @@ -47,54 +47,55 @@ sub match_full_path { } my %host_path_match = ( - "8tracks.com" => qr!^/mixes/!, - "bandcamp.com" => qr!^/EmbeddedPlayer/!, - "blip.tv" => qr!^/play/!, + # regex, whether this supports https or not + "8tracks.com" => [ qr!^/mixes/!, 0 ], + "bandcamp.com" => [ qr!^/EmbeddedPlayer/!, 1 ], + "blip.tv" => [ qr!^/play/!, 1 ], - "www.dailymotion.com" => qr!^/embed/video/!, - "dotsub.com" => qr!^/media/!, + "www.dailymotion.com" => [ qr!^/embed/video/!, 1 ], + "dotsub.com" => [ qr!^/media/!, 1 ], - "www.goodreads.com" => qr!^/widgets/!, + "www.goodreads.com" => [ qr!^/widgets/!, 1 ], - "maps.google.com" => qr!^/maps!, - "www.google.com" => qr!^/calendar/!, + "maps.google.com" => [ qr!^/maps!, 1 ], + "www.google.com" => [ qr!^/calendar/!, 1 ], # drawings do not need to be whitelisted as they are images. # forms arent being allowed for security concerns. - "docs.google.com" => qr!^/(document|spreadsheet|presentation)/!, + "docs.google.com" => [ qr!^/(document|spreadsheet|presentation)/!, 1 ], - "www.kickstarter.com" => qr!/widget/[a-zA-Z]+\.html$!, + "www.kickstarter.com" => [ qr!/widget/[a-zA-Z]+\.html$!, 1 ], - "ext.nicovideo.jp" => qr!^/thumb/!, + "ext.nicovideo.jp" => [ qr!^/thumb/!, 0 ], - "www.sbs.com.au" => qr!/player/embed/!, # best guess; language parameter before /player may vary - "www.scribd.com" => qr!^/embeds/!, - "www.slideshare.net" => qr!^/slideshow/embed_code/!, - "w.soundcloud.com" => qr!^/player/!, - "embed.spotify.com" => qr!^/$!, + "www.sbs.com.au" => [ qr!/player/embed/!, 0 ], # best guess; language parameter before /player may vary + "www.scribd.com" => [ qr!^/embeds/!, 1 ], + "www.slideshare.net" => [ qr!^/slideshow/embed_code/!, 1 ], + "w.soundcloud.com" => [ qr!^/player/!, 1 ], + "embed.spotify.com" => [ qr!^/$!, 1 ], - "www.twitvid.com" => qr!^/embed.php$!, + "www.twitvid.com" => [ qr!^/embed.php$!, 0 ], - "player.vimeo.com" => qr!^/video/\d+$!, + "player.vimeo.com" => [ qr!^/video/\d+$!, 1 ], - "www.plurk.com" => qr!^/getWidget$!, + "www.plurk.com" => [ qr!^/getWidget$!, 1 ], - "instagram.com" => qr!^/p/.*/embed/$!, + "instagram.com" => [ qr!^/p/.*/embed/$!, 1 ], - "www.criticalcommons.org" => qr!/embed_view$!, + "www.criticalcommons.org" => [ qr!/embed_view$!, 0 ], - "embed.ted.com" => qr!^/talks/!, + "embed.ted.com" => [ qr!^/talks/!, 1 ], - "archive.org" => qr!^/embed/!, + "archive.org" => [ qr!^/embed/!, 1 ], - "video.yandex.ru" => qr!^/iframe/[\-\w]+/[a-z0-9]+\.\d{4}/?$!, #don't think the last part can include caps; amend if necessary + "video.yandex.ru" => [ qr!^/iframe/[\-\w]+/[a-z0-9]+\.\d{4}/?$!, 1 ], #don't think the last part can include caps; amend if necessary - "episodecalendar.com" => qr!^/icalendar/!, + "episodecalendar.com" => [ qr!^/icalendar/!, 0 ], - "www.flickr.com" => qr!/player/$!, + "www.flickr.com" => [ qr!/player/$!, 1 ], - "www.npr.org" => qr!^/templates/event/embeddedVideo\.php!, + "www.npr.org" => [ qr!^/templates/event/embeddedVideo\.php!, 1 ], - "imgur.com" => qr!^/a/.+?/embed!, + "imgur.com" => [ qr!^/a/.+?/embed!, 1 ], ); @@ -116,16 +117,18 @@ LJ::Hooks::register_hook( 'allow_iframe_embeds', sub { my $uri_host = $parsed_uri->host; my $uri_path = $parsed_uri->path; # not including query - my $path_regex = $host_path_match{$uri_host}; - return 1 if $path_regex && ( $uri_path =~ $path_regex ); + my $host_details = $host_path_match{$uri_host}; + my $path_regex = $host_details->[0]; + + return ( 1, $host_details->[1] ) if $path_regex && ( $uri_path =~ $path_regex); ## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html) if ( match_subdomain( "youtube.com", $uri_host ) || match_subdomain( "youtube-nocookie.com", $uri_host ) ) { - return 1 if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path ); + return ( 1, 1 ) if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path ); } if ( $uri_host eq "commons.wikimedia.org" ) { - return 1 if $uri_path =~ m!^/wiki/File:! && $parsed_uri->query =~ m/embedplayer=yes/; + return ( 1, 1 ) if $uri_path =~ m!^/wiki/File:! && $parsed_uri->query =~ m/embedplayer=yes/; } return 0; diff --git a/cgi-bin/LJ/CleanHTML.pm b/cgi-bin/LJ/CleanHTML.pm index 8acd1e1e2d..a037d3ed5f 100644 --- a/cgi-bin/LJ/CleanHTML.pm +++ b/cgi-bin/LJ/CleanHTML.pm @@ -433,9 +433,12 @@ sub clean # force this specific instance of the tag to be allowed (for conditional) my $force_allow = 0; + if (defined $action{$tag} and $action{$tag} eq "conditional") { if ( $tag eq "iframe" ) { - $force_allow = LJ::Hooks::run_hook( 'allow_iframe_embeds', $attr->{src} ); + my $can_https; + ( $force_allow, $can_https ) = LJ::Hooks::run_hook( 'allow_iframe_embeds', $attr->{src} ); + $attr->{src} =~ s!^https?:!! if $opts->{force_https_embed} && $can_https; # convert to protocol-relative URL unless ( $force_allow ) { ## eat this tag if (!$attr->{'/'}) { @@ -1519,7 +1522,7 @@ sub clean_event # clean JS out of embed module sub clean_embed { - my ( $ref ) = @_; + my ( $ref, $opts ) = @_; return unless $$ref; return unless LJ::is_enabled( 'embedmodule-cleancontent' ); @@ -1538,6 +1541,7 @@ sub clean_embed { noexpandembedded => 1, transform_embed_nocheck => 1, rewrite_embed_param => 1, + force_https_embed => $opts->{display_as_content} && $LJ::IS_SSL, }); } diff --git a/cgi-bin/LJ/EmbedModule.pm b/cgi-bin/LJ/EmbedModule.pm index c14272a92f..e7b9bd1902 100644 --- a/cgi-bin/LJ/EmbedModule.pm +++ b/cgi-bin/LJ/EmbedModule.pm @@ -646,6 +646,9 @@ sub module_content { my $preview = $opts{preview}; + # are we displaying the content? (as opposed to processing the text for other reasons) + my $display = $opts{display_as_content}; + # try memcache my $memkey = $class->memkey($journalid, $moduleid, $preview); my ($content, $linktext, $url); # for direct linking @@ -669,7 +672,7 @@ sub module_content { LJ::text_uncompress(\$content) if $content =~ s/^C-//; # clean js out of content - LJ::CleanHTML::clean_embed( \$content ); + LJ::CleanHTML::clean_embed( \$content, { display_as_content => $display }); my $return_content;