/
expressen.php
executable file
·66 lines (51 loc) · 2.92 KB
/
expressen.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
<?php
/* ---------------------------------------------------------------------------*
* *
* This file is part of Beaneditor *
* *
* _/ *
* /o| *
* |o|| *
* |o|| *
* v\| *
* *
* *
* Beaneditor is free software: you can redistribute it and / or modify it *
* under the terms of the GNU Affero General Public License as published by *
* the Free Software Foundation, either version three of the License or (at *
* your option) any later version. *
* *
* Beaneditor is distributed in hope that it will be useful but WITHOUT ANY *
* WARRANTY; without even the implied warranty of MERCHANTABILTY or FITNESS *
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for *
* more details. *
* *
* You should have received a copy of the GNU Affero General Public License *
* along with Beaneditor. If not, see <http://www.gnu.org/licenses/>. *
* *
*----------------------------------------------------------------------------*/
// PARSER FOR EXPRESSEN
// (1) domain match
$identify_by_domain = array(
'expressen.se'
);
// (2) for unmatched URL:s, we look for signs in html source
$identify_by_source = array(
);
// ---------------------------------------------------------------
// ---------------------------------------------------------------
// PARSER
// function must be named "parse_" + (filename - ".php")
// 1. fetch page from URL
// 2. print parsed HTML
function parse_expressen($url, $page_source) {
$html = str_get_html($page_source);
// get title
$title = $html->find("h1",0)->innertext;
$lead = $html->find(".b-text_article-preamble",0)->innertext;
$postbody = $html->find(".b-article__content",0)->find(".b-text_article",1)->innertext;
// wrap in article structure
$content = '<div class="article"><h1>'.$title.'</h1><div class="lead">'.$lead.'</div>'.$postbody.'<address><a href="'.$url.'">Expressen | '.$title.'</a></address></div>';
return $content;
}
?>