Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

use sqlite's fulltext search

  • Loading branch information...
commit 93d317cdb495050f9c0e499e61580d00c7662b3b 1 parent 9a147a4
@datamuc authored
Showing with 121 additions and 26 deletions.
  1. +25 −6 polizei.pl
  2. +93 −9 polizei.psgi
  3. +3 −11 schema.sql
View
31 polizei.pl
@@ -16,18 +16,21 @@
use Digest::SHA;
use XML::Feed;
use Devel::Peek;
-use Carp::Always;
+#use Carp::Always;
use URI;
use Encode;
use DBI;
use utf8;
use feature 'unicode_strings';
-my $db = DBI->connect("dbi:SQLite:/home/danielt/log/polizei.db")
+my $db = DBI->connect("dbi:SQLite:/home/danielt/log/polizei2.db", undef, undef, { RaiseError => 1, PrintError => 0 })
or die($DBI::errstr);
my $insert = $db->prepare(q{
- insert or ignore into meldungen
- (id, title, meldung, ts) values (?,?,?,datetime('now'))
+ insert into meldungen_idx
+ (id, ts, meldung_docid) values (?,datetime('now'), ?)
+});
+my $docins = $db->prepare(q{
+ insert into meldungen_fts (meldung, title) values (?,?)
});
my $feed = "http://www.polizei.bayern.de/muenchen/polizei.rss";
my $myfeed = XML::Feed->new( 'RSS' );
@@ -49,7 +52,7 @@ sub buildItems {
unless($stop) {
for my $l ($dom->find('a[class="verweiseLinks"]')->each) {
- if ($l->text =~ /Wiesn-Report/i) {
+ if ($l->text =~ /Wiesn.*Report/i) {
buildItems(URI->new_abs($l->attr("href"),$link)->as_string,1);
}
}
@@ -72,7 +75,10 @@ sub buildItems {
}
for my $i (0..$#titles) {
+ #Dump($contents[$i]);
my $guid = Digest::SHA::sha1_hex($contents[$i]);
+ #$contents[$i] = Encode::encode('utf-8', $contents[$i]);
+ #say STDERR "|$contents[$i]|\n\n";
my $item = XML::Feed::Entry->new('RSS');
next unless (length($titles[$i]) or length($contents[$i]));
$item->title($titles[$i]);
@@ -80,7 +86,20 @@ sub buildItems {
$item->link('http://data.rbfh.de/p.cgi/'.substr($guid, 0, 10));
$item->id($guid);
$myfeed->add_entry( $item );
- $insert->execute($guid, $titles[$i], $contents[$i]) if (! -t STDOUT);
+
+ eval {
+ $db->begin_work;
+ $docins->execute($contents[$i], $titles[$i]);
+ my $docid = $db->sqlite_last_insert_rowid;
+ $insert->execute($guid, $docid);
+ };
+ my $err = $@;
+ unless($err) {
+ $db->commit;
+ next;
+ }
+ say $err if $err !~ /column id is not unique/;
+ $db->rollback;
}
View
102 polizei.psgi
@@ -8,20 +8,44 @@
# Danijel Tasov
# ----------------------------------------------------------------------
+use 5.010;
+
package Polizei {
use Web::Simple;
use DBI;
use Template::Mustache;
use Data::Section::Simple;
-
- my $db = DBI->connect("dbi:SQLite:/home/danielt/log/polizei.db");
+ use Time::Piece;
+ use IO::File;
+ Time::Piece::mon_list(qw(
+ Januar Februar März April Mai Juni
+ Juli August September Oktober November Dezember
+ ));
+
+ my $db = DBI->connect("dbi:SQLite:/home/danielt/log/polizei2.db");
my $ds = Data::Section::Simple->new('main');
- my $get = $db->prepare("select * from meldungen where id like ? || '%'");
- my $page = $db->prepare("select id,title from meldungen order by ts desc limit 10 offset 10 * ?");
+ my $get = $db->prepare(q{
+ select *,strftime('%s', ts) epoch
+ from meldungen_idx i join meldungen_fts f on (i.meldung_docid = f.docid)
+ where id like ? || '%'
+ });
+ my $page = $db->prepare(q{
+ select id,title
+ from meldungen_idx i join meldungen_fts f on (i.meldung_docid = f.docid)
+ order by ts desc limit 10 offset 10 * ?
+ });
+ my $search = $db->prepare(q{
+ select id,title,snippet(meldungen_fts) snip
+ from meldungen_idx i join meldungen_fts f on (i.meldung_docid = f.docid)
+ where meldungen_fts match ?
+ order by ts desc
+ limit 200
+ });
my $partials = { head => scalar $ds->get_data_section('head') };
my $index = $ds->get_data_section('index');
my $meldung = $ds->get_data_section('meldung');
+ my $nothing = $ds->get_data_section('nothing');
sub dispatch_request {
my ($self, $env) = @_;
@@ -29,6 +53,10 @@ package Polizei {
my ($self) = @_;
$self->render_page(0, $env);
},
+ sub ((GET|HEAD) + /search/ + ?needle=) {
+ my ($self, $needle) = @_;
+ $self->render_search($needle,$env);
+ },
sub ((GET|HEAD) + /p/*) {
my ($self, $id) = @_;
$self->render_page($id, $env);
@@ -39,6 +67,40 @@ package Polizei {
}
}
+ sub render_search {
+ my ($self, $needle, $env) = @_;
+ $search->execute($needle);
+ my $meldungen = $search->fetchall_arrayref({});
+ unless(@$meldungen) {
+ return $self->render_nothing_found($env, $needle);
+ }
+ for my $m (@$meldungen) {
+ $m->{link} = "$env->{SCRIPT_NAME}/". substr($m->{id}, 0, 10);
+ $m->{title} = "[no title]"
+ unless(length($m->{title}));
+ }
+ my $vars = {
+ meldungen => $meldungen,
+ script_name => $env->{SCRIPT_NAME},
+ needle => $needle,
+ };
+ [ 200,
+ [ 'content-type', 'text/html; charset=utf-8'],
+ [ Template::Mustache->render($index, $vars, $partials)]
+ ];
+ }
+
+ sub render_nothing_found {
+ my $vars = {
+ script_name => $_[1]->{SCRIPT_NAME},
+ needle => $_[2],
+ };
+ [ 200,
+ [ 'content-type', 'text/html; charset=utf-8'],
+ [ Template::Mustache->render($nothing, $vars, $partials)]
+ ];
+ }
+
sub render_article {
my ($self, $id, $env) = @_;
$get->execute($id);
@@ -55,8 +117,12 @@ package Polizei {
$row->{script_name} = $env->{SCRIPT_NAME};
+ my $tp = localtime($row->{epoch});
+ $row->{datum} = sprintf "%d. %s %d",
+ $tp->mday, $tp->fullmonth, $tp->year;
+
[ 200,
- ['Content-Type', 'text/html; charset=utf-8'],
+ ['Content-Type', 'text/html'],
[ Template::Mustache->render($meldung, $row, $partials) ]
];
}
@@ -88,8 +154,8 @@ package Polizei {
sub render_404 {
[ 404,
- ['content-type', 'text/plain'],
- [ '404 Not Found' ]
+ ['content-type', 'text/html'],
+ IO::File->new("/home/danielt/public_html/404.html")
]
}
@@ -101,23 +167,38 @@ Polizei->run_if_script;
__DATA__
@@ index
{{>head}}
+{{#needle}}
+<h2><a href="{{script_name}}/">Polizei München Presseberichte</a></h2>
+{{/needle}}
+{{^needle}}
<h2><a href="/polizei.rss">Polizei München Presseberichte</a></h2>
+{{/needle}}
<ul>
{{#meldungen}}
-<li><a href="{{link}}">{{title}}</a>
+<li><a href="{{link}}">{{title}}</a>{{#snip}}
+<br />{{{snip}}}{{/snip}}
{{/meldungen}}
</ul>
<hr>
+{{#next}}
<a href="{{next}}">weiter…</a>
+{{/next}}
@@ meldung
{{>head}}
<h2><a href="{{script_name}}/">Polizei München Presseberichte</a></h2>
-<h3>{{title}}</h3> {{{meldung}}}
+<h3>{{title}}</h3>
+<p style="text-align: right">{{ datum }}</p>
+{{{meldung}}}
+
+@@ nothing
+{{>head}}
+Nothing found
@@ head
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
+<meta http-equiv="content-type" content="text/html; charset=UTF-8">
{{#title}}
<title>{{title}} - Polizeiberichte München</title>
{{/title}}
@@ -140,3 +221,6 @@ a { color: #2c2; text-decoration: none; }
a:visited { color: #0a0; }
</style>
<link rel="alternate" type="application/rss+xml" title="Pressemeldungen der Polizei München" href="/polizei.rss">
+<form method="GET" action="{{script_name}}/search/">
+<input placeholder="suche..." id="search" value="{{needle}}" name="needle" /> <a href="https://www.sqlite.org/fts3.html#section_3">?</a>
+</form>
View
14 schema.sql
@@ -1,11 +1,3 @@
-/*
- * -----------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <m@rbfh.de> wrote this file. As long as you retain this notice you can
- * do whatever you want with this stuff. If we meet some day, and you
- * think this stuff is worth it, you can buy me a beer in return
- * Danijel Tasov
- * -----------------------------------------------------------------------
-*/
-CREATE TABLE meldungen (id char(40), title text, meldung text, ts datetime);
-CREATE UNIQUE INDEX m_id on meldungen (id);
+CREATE VIRTUAL TABLE meldungen_fts using fts4(meldung,title);
+CREATE TABLE meldungen_idx (id char(40), ts datetime, meldung_docid integer);
+CREATE UNIQUE INDEX midx_id on meldungen_idx(id);
Please sign in to comment.
Something went wrong with that request. Please try again.