Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
259 lines (219 sloc) 8.07 KB
#!/usr/bin/perl -w
#
# this is a VERY experimental code, use at own risk
#
# WARNING:
# Because of XML::RSS brokeness, this code won't work as expected with
# characters codes >127. You might try to workaround it, by adding
# binmode(OUT, "encoding($self->{'encoding'})"); to XML/RSS.pm:save()
# but even then, UTF8 stuff will break your script.
# Somebody should really fix XML::RSS.
#
# TODO:
# - make urlfeed_title, urlfeed_link, urlfeed_description work for
# already-created feeds, not only the new ones
# - some exclude-list would be useful I guess
# - enhance urlfeed_find_url() maybe
# - TEST IT! it's not idiot-proof at the moment
#
use strict;
use vars qw($VERSION %IRSSI);
use POSIX qw(strftime);
use Irssi;
use Irssi::Irc;
use XML::RSS;
$VERSION = '1.28';
%IRSSI = (
authors => 'Jakub Jankowski',
contact => 'shasta@toxcorp.com',
name => 'URLfeed',
description => 'Provides RSS feeds with URLs pasted on your channels.',
license => 'GNU GPLv2 or later',
url => 'http://toxcorp.com/irc/irssi/urlfeed/',
changed => '$Date: 2008-06-16 20:10:41 +0200 (pon, 16 cze 2008) $'
);
# $stripchan is replaced with channel name, BUT with stripped #!&+
# $chan is replaced with channel name
# $tag is replaced with server tag
my $rss_title = 'URLs on $chan';
my $rss_link = 'http://toxcorp.com/irc/irssi/';
my $rss_description = 'List of URLs recently pasted on $chan $tag channel';
my $rss_path = $ENV{HOME}.'/public_html/rss/$tag/$stripchan.rdf';
my $rss_bundle_path = $ENV{HOME}.'/public_html/rss/all.rdf';
my $max_items = 15;
my $bundle_max_items = 40;
my $debug = 1;
my $provide_bundle = 0;
sub urlfeed_build_path {
my ($tag, $chan) = @_;
my ($stripchan) = $chan =~ /^[\!\#\&\+](.+)/g;
my $str = Irssi::settings_get_str('urlfeed_path');
$str =~ s/\$tag/$tag/gi;
$str =~ s/\$chan/$chan/gi;
$str =~ s/\$stripchan/$stripchan/gi;
$str .= $chan . ".rdf" if ($str =~ /\/$/);
return $str;
}
sub urlfeed_replace ($$$) {
my ($str, $tag, $chan) = @_;
my ($stripchan) = $chan =~ /^[\!\#\&\+](.+)/g;
$str =~ s/\$tag/$tag/gi;
$str =~ s/\$chan/$chan/gi;
$str =~ s/\$stripchan/$stripchan/gi;
return $str;
}
sub urlfeed_touch_file ($) {
my ($f) = @_;
my ($basedir) = $f =~ /(.*)\/[^\/]*$/;
my @dirs = split(/[\/]+/, $basedir);
local *FH;
my $path = "";
foreach my $idx (1..$#dirs) {
$path .= "/" . $dirs[$idx];
if (! -d $path) {
Irssi::print("URLfeed warning: $path is not a dir, trying to mkdir");
eval { mkdir($path); };
if ($@) {
Irssi::print("URLfeed error: couldn't mkdir($path): $@");
return 0;
}
}
}
if (! -w $basedir) {
Irssi::print("URLfeed error: $basedir isn't writable");
return 0;
}
eval { open(FH, "+<", $f); };
if ($@) {
Irssi::print("URLfeed error: couldn't open $f for writing: $@");
return 0;
}
close(FH);
return 1;
}
sub urlfeed_format_time ($) {
my @t = localtime($_[0]);
my $time = strftime("%Y-%m-%dT%H:%M:%S", @t);
my $tzd = strftime("%z", @t);
return sprintf("%s%s:%s", $time, substr($tzd,0,3), substr($tzd,3));
}
# we might make use of timestamp someday
sub urlfeed_rss_add {
my ($timestamp, $tag, $chan, $nickname, $text, $url) = @_;
return 0 unless (defined $url && defined $tag && defined $chan);
$nickname = "guest" unless (defined $nickname);
$text = $url unless (defined $text);
my $filename = urlfeed_build_path($tag, $chan);
if (!urlfeed_touch_file($filename)) {
Irssi::print("URLfeed error: Couldn't touch $filename");
return 0;
}
my $rss = new XML::RSS (version => '1.0', encoding => 'ISO-8859-1');
eval { $rss->parsefile($filename); };
if ($@) {
Irssi::print("URLfeed notice: rss->parsefile($filename) failed. Creating new RSS") if (Irssi::settings_get_bool('urlfeed_debug'));
$rss->channel(
title => urlfeed_replace(Irssi::settings_get_str('urlfeed_title'), $tag, $chan),
link => urlfeed_replace(Irssi::settings_get_str('urlfeed_link'), $tag, $chan),
description => urlfeed_replace(Irssi::settings_get_str('urlfeed_description'), $tag, $chan)
);
}
# tiny spam protection
foreach my $item (@{$rss->{'items'}}) {
return 0 if (lc($url) eq lc($item->{'link'}));
}
my $guard = 0;
while (@{$rss->{'items'}} >= Irssi::settings_get_int('urlfeed_max_items') && $guard++ < 10000) {
pop(@{$rss->{'items'}});
}
$rss->add_item(title => $text,
link => $url,
dc => { creator => $nickname, date => urlfeed_format_time($timestamp) },
mode => 'insert'
);
$rss->save($filename);
return 1 unless (Irssi::settings_get_bool('urlfeed_provide_bundle'));
# now do the bundle part
$filename = Irssi::settings_get_str('urlfeed_bundle_path');
if (!urlfeed_touch_file($filename)) {
Irssi::print("URLfeed error: Couldn't touch $filename");
return 0;
}
my $brss = new XML::RSS (version => '1.0', encoding => 'ISO-8859-1');
eval { $brss->parsefile($filename); };
if ($@) {
Irssi::print("URLfeed notice: rss->parsefile($filename) failed. Creating new RSS") if (Irssi::settings_get_bool('urlfeed_debug'));
$brss->channel(
title => 'URL feed from IRC',
link => 'http://toxcorp.com/irc/irssi/',
description => 'RSS feed with URLs pasted on IRC networks'
);
}
# tiny spam protection
foreach my $item (@{$brss->{'items'}}) {
return 0 if (lc($url) eq lc($item->{'link'}));
}
my $guard = 0;
while (@{$brss->{'items'}} >= Irssi::settings_get_int('urlfeed_bundle_max_items') && $guard++ < 10000) {
pop(@{$brss->{'items'}});
}
$brss->add_item(title => $text,
link => $url,
dc => { creator => $nickname . " on " . $tag, date => urlfeed_format_time($timestamp) },
mode => 'insert'
);
$brss->save($filename);
return 1;
}
# based on urlgrab.pl by David Leadbeater
sub urlfeed_find_urls {
my ($text) = @_;
my @chunks = split(/[ \t]+/, $text);
my @urls = ();
foreach my $chunk (@chunks) {
if($chunk =~ /((ftp|http|https):\/\/[a-zA-Z0-9\/\\\:\?\%\.\&\;=#\-\_\!\+\~\,]+)/i) {
push(@urls, $1);
} elsif ($chunk =~ /(www\.[a-zA-Z0-9\/\\\:\?\%\.\&\;=#\-\_\!\+\~\,]+)/i) {
push(@urls, "http://" . $1);
}
}
return @urls;
}
sub urlfeed_process {
my ($time, $tag, $target, $nick, $text) = @_;
my @urls = urlfeed_find_urls($text);
foreach my $url (@urls) {
my $retval = urlfeed_rss_add($time, $tag, $target, $nick, $text, $url);
if (Irssi::settings_get_bool('urlfeed_debug')) {
# escape url, in case it needs to be Irssi::print()ed
$url =~ s/\%/\%\%/g;
if ($retval == 1) {
Irssi::print("URLfeed notice: URL $url (pasted by $nick on $target/$tag) successfully added to RSS feed.");
} elsif ($retval == 0) {
Irssi::print("URLfeed notice: Adding URL $url (pasted by $nick on $target/$tag) to RSS failed.");
}
}
}
}
sub urlfeed_message_own_public {
my ($server, $text, $target) = @_;
return unless ($target =~ /^[\!\#\&\+]/);
$target = '!' . substr($target, 6) if ($target =~ /^\!/);
urlfeed_process(time, $server->{tag}, lc($target), $server->{nick}, $text);
}
sub urlfeed_message_public {
my ($server, $text, $nick, $hostmask, $target) = @_;
return unless ($target =~ /^[\!\#\&\+]/);
urlfeed_process(time, $server->{tag}, lc($target), $nick, $text);
}
Irssi::settings_add_bool('urlfeed', 'urlfeed_debug', $debug);
Irssi::settings_add_bool('urlfeed', 'urlfeed_provide_bundle', $provide_bundle);
Irssi::settings_add_int ('urlfeed', 'urlfeed_max_items', $max_items);
Irssi::settings_add_int ('urlfeed', 'urlfeed_bundle_max_items', $bundle_max_items);
Irssi::settings_add_str ('urlfeed', 'urlfeed_title', $rss_title);
Irssi::settings_add_str ('urlfeed', 'urlfeed_link', $rss_link);
Irssi::settings_add_str ('urlfeed', 'urlfeed_description', $rss_description);
Irssi::settings_add_str ('urlfeed', 'urlfeed_path', $rss_path);
Irssi::settings_add_str ('urlfeed', 'urlfeed_bundle_path', $rss_bundle_path);
Irssi::signal_add_last('message public', 'urlfeed_message_public');
Irssi::signal_add_last('message own_public', 'urlfeed_message_own_public');