Skip to content

Commit

Permalink
[#1213] Rewrites http:// video embeds to use protocol relative URLs
Browse files Browse the repository at this point in the history
* ... only if they support them (all the big sites do, smaller ones may
  or may not). We don't want to force to https if it isn't supported
  because the behavior is unreliable.

  An http embed on https may throw up a browser warning.

  A non-working https embed may, e.g., show a cryptic error message
  about internal server error, a blank page, or (in one case) an
  unrelated internal service

* added arguments so that we only change the URL if we're going to
  display the embed (as opposed to editing the embed, or cleaning for
  saving in the database).

Fixes #1213.
  • Loading branch information
afuna committed Feb 21, 2015
1 parent bc76e0c commit 5d34b93
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 35 deletions.
1 change: 1 addition & 0 deletions cgi-bin/DW/Controller/Journal/EmbeddedContent.pm
Expand Up @@ -73,6 +73,7 @@ sub embedcontent_handler {
journalid => $journalid,
moduleid => $moduleid,
preview => $preview,
display_as_content => 1,
)->{content};

$r->print(qq{<html><head><style type="text/css">html, body { background-color:transparent; padding:0; margin:0; border:0; overflow:hidden; } iframe, object, embed { width: 100%; height: 100%;}</style></head><body>$content</body></html>});
Expand Down
67 changes: 35 additions & 32 deletions cgi-bin/DW/Hooks/EmbedWhitelist.pm
Expand Up @@ -47,54 +47,55 @@ sub match_full_path {
}

my %host_path_match = (
"8tracks.com" => qr!^/mixes/!,
"bandcamp.com" => qr!^/EmbeddedPlayer/!,
"blip.tv" => qr!^/play/!,
# regex, whether this supports https or not
"8tracks.com" => [ qr!^/mixes/!, 0 ],
"bandcamp.com" => [ qr!^/EmbeddedPlayer/!, 1 ],
"blip.tv" => [ qr!^/play/!, 1 ],

"www.dailymotion.com" => qr!^/embed/video/!,
"dotsub.com" => qr!^/media/!,
"www.dailymotion.com" => [ qr!^/embed/video/!, 1 ],
"dotsub.com" => [ qr!^/media/!, 1 ],

"www.goodreads.com" => qr!^/widgets/!,
"www.goodreads.com" => [ qr!^/widgets/!, 1 ],

"maps.google.com" => qr!^/maps!,
"www.google.com" => qr!^/calendar/!,
"maps.google.com" => [ qr!^/maps!, 1 ],
"www.google.com" => [ qr!^/calendar/!, 1 ],
# drawings do not need to be whitelisted as they are images.
# forms arent being allowed for security concerns.
"docs.google.com" => qr!^/(document|spreadsheet|presentation)/!,
"docs.google.com" => [ qr!^/(document|spreadsheet|presentation)/!, 1 ],

"www.kickstarter.com" => qr!/widget/[a-zA-Z]+\.html$!,
"www.kickstarter.com" => [ qr!/widget/[a-zA-Z]+\.html$!, 1 ],

"ext.nicovideo.jp" => qr!^/thumb/!,
"ext.nicovideo.jp" => [ qr!^/thumb/!, 0 ],

"www.sbs.com.au" => qr!/player/embed/!, # best guess; language parameter before /player may vary
"www.scribd.com" => qr!^/embeds/!,
"www.slideshare.net" => qr!^/slideshow/embed_code/!,
"w.soundcloud.com" => qr!^/player/!,
"embed.spotify.com" => qr!^/$!,
"www.sbs.com.au" => [ qr!/player/embed/!, 0 ], # best guess; language parameter before /player may vary
"www.scribd.com" => [ qr!^/embeds/!, 1 ],
"www.slideshare.net" => [ qr!^/slideshow/embed_code/!, 1 ],
"w.soundcloud.com" => [ qr!^/player/!, 1 ],
"embed.spotify.com" => [ qr!^/$!, 1 ],

"www.twitvid.com" => qr!^/embed.php$!,
"www.twitvid.com" => [ qr!^/embed.php$!, 0 ],

"player.vimeo.com" => qr!^/video/\d+$!,
"player.vimeo.com" => [ qr!^/video/\d+$!, 1 ],

"www.plurk.com" => qr!^/getWidget$!,
"www.plurk.com" => [ qr!^/getWidget$!, 1 ],

"instagram.com" => qr!^/p/.*/embed/$!,
"instagram.com" => [ qr!^/p/.*/embed/$!, 1 ],

"www.criticalcommons.org" => qr!/embed_view$!,
"www.criticalcommons.org" => [ qr!/embed_view$!, 0 ],

"embed.ted.com" => qr!^/talks/!,
"embed.ted.com" => [ qr!^/talks/!, 1 ],

"archive.org" => qr!^/embed/!,
"archive.org" => [ qr!^/embed/!, 1 ],

"video.yandex.ru" => qr!^/iframe/[\-\w]+/[a-z0-9]+\.\d{4}/?$!, #don't think the last part can include caps; amend if necessary
"video.yandex.ru" => [ qr!^/iframe/[\-\w]+/[a-z0-9]+\.\d{4}/?$!, 1 ], #don't think the last part can include caps; amend if necessary

"episodecalendar.com" => qr!^/icalendar/!,
"episodecalendar.com" => [ qr!^/icalendar/!, 0 ],

"www.flickr.com" => qr!/player/$!,
"www.flickr.com" => [ qr!/player/$!, 1 ],

"www.npr.org" => qr!^/templates/event/embeddedVideo\.php!,
"www.npr.org" => [ qr!^/templates/event/embeddedVideo\.php!, 1 ],

"imgur.com" => qr!^/a/.+?/embed!,
"imgur.com" => [ qr!^/a/.+?/embed!, 1 ],

);

Expand All @@ -116,16 +117,18 @@ LJ::Hooks::register_hook( 'allow_iframe_embeds', sub {
my $uri_host = $parsed_uri->host;
my $uri_path = $parsed_uri->path; # not including query

my $path_regex = $host_path_match{$uri_host};
return 1 if $path_regex && ( $uri_path =~ $path_regex );
my $host_details = $host_path_match{$uri_host};
my $path_regex = $host_details->[0];

return ( 1, $host_details->[1] ) if $path_regex && ( $uri_path =~ $path_regex);

## YouTube (http://apiblog.youtube.com/2010/07/new-way-to-embed-youtube-videos.html)
if ( match_subdomain( "youtube.com", $uri_host ) || match_subdomain( "youtube-nocookie.com", $uri_host ) ) {
return 1 if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path );
return ( 1, 1 ) if match_full_path( qr!/embed/[-_a-zA-Z0-9]{11,}!, $uri_path );
}

if ( $uri_host eq "commons.wikimedia.org" ) {
return 1 if $uri_path =~ m!^/wiki/File:! && $parsed_uri->query =~ m/embedplayer=yes/;
return ( 1, 1 ) if $uri_path =~ m!^/wiki/File:! && $parsed_uri->query =~ m/embedplayer=yes/;
}

return 0;
Expand Down
8 changes: 6 additions & 2 deletions cgi-bin/LJ/CleanHTML.pm
Expand Up @@ -433,9 +433,12 @@ sub clean

# force this specific instance of the tag to be allowed (for conditional)
my $force_allow = 0;

if (defined $action{$tag} and $action{$tag} eq "conditional") {
if ( $tag eq "iframe" ) {
$force_allow = LJ::Hooks::run_hook( 'allow_iframe_embeds', $attr->{src} );
my $can_https;
( $force_allow, $can_https ) = LJ::Hooks::run_hook( 'allow_iframe_embeds', $attr->{src} );
$attr->{src} =~ s!^https?:!! if $opts->{force_https_embed} && $can_https; # convert to protocol-relative URL
unless ( $force_allow ) {
## eat this tag
if (!$attr->{'/'}) {
Expand Down Expand Up @@ -1519,7 +1522,7 @@ sub clean_event

# clean JS out of embed module
sub clean_embed {
my ( $ref ) = @_;
my ( $ref, $opts ) = @_;
return unless $$ref;
return unless LJ::is_enabled( 'embedmodule-cleancontent' );

Expand All @@ -1538,6 +1541,7 @@ sub clean_embed {
noexpandembedded => 1,
transform_embed_nocheck => 1,
rewrite_embed_param => 1,
force_https_embed => $opts->{display_as_content} && $LJ::IS_SSL,
});
}

Expand Down
5 changes: 4 additions & 1 deletion cgi-bin/LJ/EmbedModule.pm
Expand Up @@ -646,6 +646,9 @@ sub module_content {

my $preview = $opts{preview};

# are we displaying the content? (as opposed to processing the text for other reasons)
my $display = $opts{display_as_content};

# try memcache
my $memkey = $class->memkey($journalid, $moduleid, $preview);
my ($content, $linktext, $url); # for direct linking
Expand All @@ -669,7 +672,7 @@ sub module_content {
LJ::text_uncompress(\$content) if $content =~ s/^C-//;

# clean js out of content
LJ::CleanHTML::clean_embed( \$content );
LJ::CleanHTML::clean_embed( \$content, { display_as_content => $display });

my $return_content;

Expand Down

0 comments on commit 5d34b93

Please sign in to comment.