This repository has been archived by the owner on Jun 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
class.php
718 lines (635 loc) · 23.5 KB
/
class.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
<?php
/**
* WebArchiveBOT: botclases.php based MediaWiki script for archiving external links to Internet Archive Wayback Machine.
*
* @copyright (c) 2015-2018 Davod - https://commons.wikimedia.org/wiki/User:Amitie_10g
*
* Contains parts of the Chris G's Bot classes library - https://www.mediawiki.org/wiki/Manual:Chris_G%27s_botclasses
*
* (c) 2008-2012 Chris G http://en.wikipedia.org/wiki/User:Chris_G
* (c) 2009-2010 Fale http://en.wikipedia.org/wiki/User:Fale
* (c) 2010 Kaldari http://en.wikipedia.org/wiki/User:Kaldari
* (c) 2011 Gutza http://en.wikipedia.org/wiki/User:Gutza
* (c) 2012 Sean http://en.wikipedia.org/wiki/User:SColombo
* (c) 2012 Brain http://en.wikipedia.org/wiki/User:Brian_McNeil
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Developers (add yourself here if you worked on the code):
* Cobi - [[User:Cobi]] - Wrote the http class and some of the wikipedia class
* Chris - [[User:Chris_G]] - Wrote the most of the wikipedia class
* Fale - [[User:Fale]] - Polish, wrote the extended and some of the wikipedia class
* Kaldari - [[User:Kaldari]] - Submitted a patch for the imagematches function
* Gutza - [[User:Gutza]] - Submitted a patch for http->setHTTPcreds(), and http->quiet
* Sean - [[User:SColombo]] - Wrote the lyricwiki class (now moved to lyricswiki.php)
* Brain - [[User:Brian_McNeil]] - Wrote wikipedia->getfileuploader() and wikipedia->getfilelocation
* Davod - [[User:Amitie_10g]] - See bellow:
**/
/**
* This class is designed to provide a simplified interface to cURL which maintains cookies.
* @author Cobi
* @property $ch The cURL class reference
* @property $uid The cURL UID object
* @property $cookie_jar The cURL cookie_jar object
* @property $postfollowredirs
* @property $getfollowredirs
* @property $quiet
* @property $userAgent The default User agent
* @property $httpHeader
* @property $defaultHttpHeader
**/
class http {
private $ch;
private $uid;
public $cookie_jar;
public $postfollowredirs;
public $getfollowredirs;
public $quiet=false;
public $userAgent = 'php wikibot classes';
public $httpHeader = array('Expect:');
public $defaultHttpHeader = array('Expect:');
/**
* This is the Construct.
* @return void
**/
public function __construct(){
$this->ch = curl_init();
$this->uid = dechex(rand(0,99999999));
curl_setopt($this->ch,CURLOPT_COOKIEJAR,TEMP_PATH.'/cluewikibot.cookies.'.$this->uid.'.dat');
curl_setopt($this->ch,CURLOPT_COOKIEFILE,TEMP_PATH.'/cluewikibot.cookies.'.$this->uid.'.dat');
curl_setopt($this->ch,CURLOPT_MAXCONNECTS,100);
$this->postfollowredirs = 0;
$this->getfollowredirs = 1;
$this->cookie_jar = array();
}
/**
* Get the HTTP code from cURL.
* @return array
**/
public function http_code(){
return curl_getinfo($this->ch, CURLINFO_HTTP_CODE);
}
/**
* Encode the HTTP data.
* @param $data
* @param $keyprefix
* @param $keypost
* @return array
**/
public function data_encode($data,$keyprefix = "",$keypostfix = ""){
assert(is_array($data));
$vars=null;
foreach($data as $key=>$value){
if(is_array($value)) $vars .= $this->data_encode($value, $keyprefix.$key.$keypostfix.urlencode("["), urlencode("]"));
else $vars .= $keyprefix.$key.$keypostfix."=".urlencode($value)."&";
}
return $vars;
}
/**
* Send data through HTTP POST.
* @param $url The target URL.
* @param $data The POST data.
* @return mixed The response from Server.
**/
public function post($url,$data){
$time = microtime(1);
curl_setopt($this->ch,CURLOPT_URL,$url);
curl_setopt($this->ch,CURLOPT_USERAGENT,$this->userAgent);
/* Crappy hack to add extra cookies, should be cleaned up */
foreach ($this->cookie_jar as $name => $value){
if (empty($cookies)) $cookies = "$name=$value";
else $cookies .= "; $name=$value";
}
if ($cookies != null)
curl_setopt($this->ch,CURLOPT_COOKIE,$cookies);
curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,$this->postfollowredirs);
curl_setopt($this->ch,CURLOPT_MAXREDIRS,10);
curl_setopt( $this->ch, CURLOPT_HTTPHEADER, $this->httpHeader );
curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($this->ch,CURLOPT_TIMEOUT,30);
curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,10);
curl_setopt($this->ch,CURLOPT_POST,1);
curl_setopt($this->ch,CURLOPT_POSTFIELDS, $data);
$data = curl_exec($this->ch);
return $data;
}
/**
* Send data through HTTP GET.
* @param $url The target URL.
* @return mixed The response from Server.
**/
public function get($url){
$time = microtime(1);
curl_setopt($this->ch,CURLOPT_URL,$url);
curl_setopt($this->ch,CURLOPT_USERAGENT,$this->userAgent);
/* Crappy hack to add extra cookies, should be cleaned up */
$cookies = null;
foreach ($this->cookie_jar as $name => $value){
if (empty($cookies)) $cookies = "$name=$value";
else $cookies .= "; $name=$value";
}
if ($cookies != null)
curl_setopt($this->ch,CURLOPT_COOKIE,$cookies);
curl_setopt($this->ch,CURLOPT_FOLLOWLOCATION,$this->getfollowredirs);
curl_setopt($this->ch,CURLOPT_MAXREDIRS,10);
curl_setopt($this->ch,CURLOPT_HEADER,0);
curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($this->ch,CURLOPT_TIMEOUT,30);
curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,10);
curl_setopt($this->ch,CURLOPT_HTTPGET,1);
//curl_setopt($this->ch,CURLOPT_FAILONERROR,1);
$data = curl_exec($this->ch);
return $data;
}
/**
* Set the HTTP credentials.
* @param $uname
* @param $pwd
* @return void
**/
public function setHTTPcreds($uname,$pwd){
curl_setopt($this->ch, CURLOPT_HTTPAUTH, CURLAUTH_BASIC);
curl_setopt($this->ch, CURLOPT_USERPWD, $uname.":".$pwd);
}
/**
* This is the destruct.
* @return void
**/
public function __destruct (){
curl_close($this->ch);
@unlink(TEMP_PATH.'/cluewikibot.cookies.'.$this->uid.'.dat');
}
}
/**
* This class interacts with the Wiki using api.php.
* This is a modified version of the original class.
* @author Chris G and Cobi.
* @property string $url The Project URL (API path).
**/
class Wiki {
private $http;
private $token;
private $ecTimestamp;
public $url;
public $echoRet = false; // For debugging unserialize errors
/**
* This is the constructor.
* @param $url The project and API URL
* @param $hu
* @param $hp
* @return void
**/
public function __construct ($url,$hu=null,$hp=null){
$this->http = new http();
$this->token = null;
$this->url = $url;
$this->ecTimestamp = null;
if ($hu!==null) $this->http->setHTTPcreds($hu,$hp);
}
/**
* The set.
* @param $var
* @param $val
* @return void
**/
public function __set($var,$val){
switch($var){
case 'quiet':
$this->http->quiet=$val;
break;
default:
echo "WARNING: Unknown variable ($var)!\n";
}
}
/**
* Set/change the user agent.
* @param $userAgent The user agent string.
**/
public function setUserAgent($userAgent){
$this->http->userAgent = $userAgent;
}
/**
* Set/change the http header.
* @param $httpHeader The http header.
**/
public function setHttpHeader ( $httpHeader ){
$this->http->httpHeader = $httpHeader;
}
/**
* Set/change the http headers.
* @param $httpHeader The http header.
**/
public function useDefaultHttpHeader (){
$this->http->httpHeader = $this->http->defaultHttpHeader;
}
/**
* Sends a query to the API.
* @param $query The query string.
* @param $post POST data if its a post request (optional).
* @param $repeat How many times the request will be repeated.
* @param $url The URL where we want to work (for external services API).
* @return mixed The response from server (API result).
**/
public function query($query,$post=null,$repeat=null,$url=null){
if(empty($url)) $url = $this->url;
if($post==null) $ret = $this->http->get($url.$query);
else $ret = $this->http->post($url.$query,$post);
if($this->http->http_code() != "200"){
if($repeat < 10) return $this->query($query,$post,++$repeat);
else throw new Exception("HTTP Error " . $this->http->http_code() . " - $url$query" );
}
if($this->echoRet){
if( @unserialize( $ret ) === false ){
return array( 'errors' => array("The API query result can't be unserialized. Raw text is as follows: $ret\n" ) );
}
}
return unserialize( $ret );
}
/**
* Gets the content of a page. Returns false on error.
* Use getPageContents() as alternative to get the page in multiple formats
* @param $page The wikipedia page to fetch.
* @param $revid The revision id to fetch (optional).
* @param $$detectEditConflict
* @return string The wikitext for the given page.
**/
public function getpage($page,$revid=null,$detectEditConflict=false){
$append = '';
if ($revid!=null)
$append = '&rvstartid='.$revid;
$x = $this->query('?action=query&format=php&prop=revisions&titles='.urlencode($page).'&rvlimit=1&rvprop=content|timestamp'.$append);
foreach ($x['query']['pages'] as $ret){
if (isset($ret['revisions'][0]['*'])){
if ($detectEditConflict)
$this->ecTimestamp = $ret['revisions'][0]['timestamp'];
return $ret['revisions'][0]['*'];
}else return false;
}
}
/**
* This function takes a username and password and logs you into Wiki.
* @param $user Username to login as.
* @param $pass Password that belongs to the username.
* @return array The API result
**/
public function login($user,$pass){
$post = array('lgname' => $user, 'lgpassword' => $pass);
$ret = $this->query('?action=login&format=php',$post);
/* This is now required - see https://phabricator.wikimedia.org/T25076 */
if ($ret['login']['result'] == 'NeedToken'){
$post['lgtoken'] = $ret['login']['token'];
$ret = $this->query( '?action=login&format=php', $post );
}
if ($ret['login']['result'] != 'Success'){
echo "Login error: \n";
print_r($ret);
die();
} else {
return $ret;
}
}
/**
* crappy hack to allow users to use cookies from old sessions.
* @param $data The data to be parsed.
* @return void
**/
public function setLogin($data){
$this->http->cookie_jar = array(
$data['cookieprefix'].'UserName' => $data['lgusername'],
$data['cookieprefix'].'UserID' => $data['lguserid'],
$data['cookieprefix'].'Token' => $data['lgtoken'],
$data['cookieprefix'].'_session' => $data['sessionid'],
);
}
}
/**
* This class is intended to do the archiving.
* @author Davod.
* @property string $api_url MediaWiki API URL
* @property string $wiki_url MediaWiki Wiki URL
* @property string $sitename MediaWiki site name
* @property string $email_operator The emailaddress of the operator,to be used to send mails to him/her in case of error.
* @property array $extlinks_bl The blacklisted URLs to exclude for archiving.
* @property int $pages_per_query The maximum pages retrived per query (iteration) (250 by default).
* @property string $db_server The database server address (absolute path for SQLite).
* @property string $db_name The database name.
* @property string $db_user The database access username.
* @property string $db_password The database access password.
* @property string $db_table The database access password.
* @property string $tool_url The tool URL.
**/
class WebArchiveBOT extends Wiki {
public $api_url;
private $wiki_url;
private $sitename;
private $email_operator;
private $extlinks_bl;
private $pages_per_query;
private $db_server;
private $db_name;
private $db_user;
private $db_password;
private $db_table;
private $tool_url;
/**
* This is the constructor.
* @param string $api_url MediaWiki API URL
* @param string $wiki_url MediaWiki Wiki URL
* @param string $sitename MediaWiki site name
* @param string $email_operator The emailaddress of the operator,to be used to send mails to him/her in case of error.
* @param array $extlinks_bl The blacklisted URLs to exclude for archiving.
* @param int $pages_per_query The maximum pages retrived per query (iteration) (250 by default).
* @param string $db_server The database server address (absolute path for SQLite).
* @param string $db_name The database name.
* @param string $db_user The database access username.
* @param string $db_password The database access password.
* @param string $db_table The database access password.
* @param string $tool_url The tool URL.
**/
public function __construct($api_url,$wiki_url,$sitename,$email_operator,$extlinks_bl,$pages_per_query,$db_server,$db_name,$db_user,$db_password,$db_table){
if(!is_array($extlinks_bl)) $extlinks_bl = null;
Wiki::__construct($api_url); // Pass main parameter to parent Class' __construct()
$this->api_url = $api_url;
$this->wiki_url = $wiki_url;
$this->tool_url = dirname($_SERVER['SCRIPT_NAME']);
$this->sitename = $sitename;
$this->email_operator = $email_operator;
$this->extlinks_bl = '/('.implode('|',$extlinks_bl).')/';
$this->pages_per_query = $pages_per_query;
$this->db_server = $db_server;
$this->db_name = $db_name;
$this->db_user = $db_user;
$this->db_password = $db_password;
$this->db_table = $db_table;
}
/**
* Get the contents from the Wiki page. This is an alternative for getpage() with more features.
* @param $page The page that we're working.
* @param $props The properties that we want to obtain from the query (string or array).
* @return array The API result (page contents and metadata in the desired format).
**/
public function getPageContents($page,$props=null){
if(is_array($props)) $props = implode('|',$props);
if(!empty($_SESSION['wiki_page_contents'][$page][$props])) $contents = $_SESSION['wiki_page_contents'][$page][$props];
else{
$contents = $this->query("?action=parse&format=php&prop=$props&disabletoc=&mobileformat=&noimages=&page=".urlencode($page));
$contents['parse']['text']['*'] = str_replace('<a','<a target="'.urlencode($page).'"',$contents['parse']['text']['*']);
$contents['parse']['text']['*'] = str_replace('href="/wiki/','href="'.$this->site_url,$contents['parse']['text']['*']);
$_SESSION['wiki_page_contents'][$page][$props] = $contents;
}
return $contents;
}
/**
* Get a list of the latest files uploaded to Commons.
* @param void
* @return array The API result (the list of the latest files uploaded).
**/
public function getLatestFiles(){
$query = "?action=query&list=allimages&format=php&aisort=timestamp&aidir=older&aiprop=timestamp%7Ccanonicaltitle&ailimit=$this->pages_per_query";
$query = $this->query($query);
return $query['query']['allimages'];
}
/**
* Wraper for in_array() that also parse regex.
* @param mixed $needle The value to find.
* @param mixed $haystack The string/array where find in.
* @param bool $regex To allow or not regex (contents in $haystack should be string and valid regex).
* If false, then, in_array() will be used. Used only for regex, does not matter for non-regex search.
* @param bool $inverse To match or not the regex (no match is done with '?!').
* @return bool true if value were found in the array, false if not.
**/
public function inArray($needle,$haystack,$regex=false,$inverse=false){
if(empty($needle)) return false;
if($regex === true){
$regex = implode('|',$haystack);
if($inverse === true) $regex = "/^(?!$regex)/";
else $regex = "/^($regex)/";
if(preg_match($regex,$needle) >= 1) $found = true;
else $found = false;
}else $found = in_array($needle,$haystack);
return $found;
}
/**
* Query the Internet Archive API to get the latest archived version (or archive if not available yet).
* @param array $urls the URLs to be parsed.
* @return array the Wayback Machine URLs retrived.
**/
public function urls2archive_urls($urls,$json=false){
foreach($urls as $url){
if(preg_match($this->extlinks_bl,$url)) continue;
// Get the latest archive, if available
$archive_g = file_get_contents('http://archive.org/wayback/available?url='.urlencode($url));
if($archive_g != '{"archived_snapshots":{}}'){
$latest_archive = json_decode($archive_g,true);
$archive_timestamp = strtotime($latest_archive['archived_snapshots']['closest']['timestamp']);
}
$timestamp = time();
if(!is_int($archive_timestamp)) $archive_timestamp = 0;
$window_time = $timestamp-$archive_timestamp;
// Do the archive, if last archive has been created >2 days
if($window_time >= 172800){
$headers = @get_headers("https://web.archive.org/save/$url",1);
if($headers[0] == "HTTP/1.1 403 FORBIDDEN") continue;
$location = $headers['Content-Location'];
if(!empty($location)){
if(is_array($location)) $location = end($location);
if(preg_match("/^\/web\/[0-9]{14}\/[\p{L}\p{N}\p{S}\p{P}\p{M}\p{Zs}]+$/",$location) === 1) $archive_urls[] = "https://web.archive.org$location";
else echo "Wrong location: $location\n";
}
}else{
$archive_urls[] = $latest_archive['archived_snapshots']['closest']['url'];
}
}
if(!empty(array_filter($archive_urls))){
if($json === true) $archive_urls = json_encode(array_unique(array_filter($archive_urls)));
else $archive_urls = array_unique(array_filter($archive_urls));
return $archive_urls;
}else{
return false;
}
}
/**
* Do the archive process: Query to Internet Archive and store the results in a local DB
* @param array $pages The pages retrived by getLatestFiles()
* @return bool The final results.
**/
public function archive($pages){
if(!is_array($pages) || empty($pages)) return false;
try{
$dsn = "mysql:dbname=$this->db_name;host=$this->db_server";
$db = new PDO($dsn,$this->db_user,$this->db_password);
}catch (PDOException $e){
$message = 'Connection to the DB failed';
echo "$message: " . $e->getMessage();
$this->sendMail("$message: " . $e->getMessage());
echo "\n";
die;
}
$db->exec("CREATE TABLE IF NOT EXISTS `data`( `id` INT NOT NULL AUTO_INCREMENT, `pageid` INT NOT NULL, `title` VARCHAR(256) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, `timestamp` TIMESTAMP NOT NULL, `urls` TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, UNIQUE KEY `id` (`id`) USING BTREE, UNIQUE KEY `pageid` (`pageid`) USING BTREE, PRIMARY KEY (`id`)) ENGINE = InnoDB;");
foreach($pages as $page){
$title = $page['canonicaltitle'];
$timestamp = $page['timestamp'];
$metadata = $this->GetPageContents($title,'externallinks');
$pageid = $metadata['parse']['pageid'];
$urls = $this->urls2archive_urls($metadata['parse']['externallinks'],true);
if(empty($urls)) continue;
$sql = "INSERT INTO data(`pageid`,`title`,`timestamp`,`urls`) VALUES ('$pageid','$title','$timestamp','$urls');";
$stmt = $db->prepare($sql);
$stmt->execute();
}
unset($db);
return true;
}
/**
* Send email (mostly for errors).
* @param string $message the message.
* @param string $subject the subject ("Errors with WebArchiveBOT" by default).
* @return void
**/
public function sendMail($message,$subject=null){
if($subject == null) $subject = "Errors with WebArchiveBOT";
$from = "webarchivebot-noreply@wmflabs.org";
$to = $this->email_operator;
$headers = "From: WebArchiveBOT <$from>\r\n";
mail($to,$subject,$message,$headers);
}
/**
* Get the page ID from MediaWiki, by the title
* @param string $title the title where the ID is needed.
* @return int
**/
public function getPageid($title){
if(empty($title)) return false;
$query = '?action=query&format=php&titles='.urlencode($title);
$query = $this->query($query);
$query = $query['query']['pages'];
foreach($query as $key=>$value){
$pageid = $key;
}
if(is_int($pageid)) return $pageid;
else return false;
}
/**
* Retrive the data.
* @param int $limit The maximum results queried to the DB.
* @param string $title The filename to search.
* @return array
**/
public function getArchive($limit=100,$title){
// Max limit is hardcoded to 100.000 to prevent memory exhaustion
if(!is_int($limit) || $limit > 10000) $limit = 100;
$dsn = "mysql:dbname=$this->db_name;host=$this->db_server";
try{
$db = new PDO($dsn,$this->db_user,$this->db_password);
}catch (PDOException $e){
die("Connection to the DB failed: " . $e->getMessage());
}
// Get the page ID for faster search in the DB
if($pageid = $this->getPageid($title)) $sql = "SELECT * FROM `$this->db_table` WHERE `pageid` = $pageid LIMIT 1;";
else $sql = "SELECT * FROM `$this->db_table` ORDER BY `id` DESC LIMIT $limit";
$stmt = $db->prepare($sql);
if($stmt->execute() !== false){
$result = $stmt->fetchAll();
foreach($result as $row){
$title = $row['title'];
$timestamp = $row['timestamp'];
$urls = json_decode($row['urls']);
$data[$title] = array('timestamp'=>$timestamp,'urls'=>$urls);
}
if(empty($data)) $data = false;
}else $data = false;
return $data;
}
/**
* Prints the main page to the browser.
* @param string $data The data to be parsed
* @return void
**/
public function printMain($data){
echo <<<EOC
<html>
<head>
<meta charset="UTF-8">
<title>WebArchiveBOT, archived items</title>
<style type="text/css">
body{
font-family:Roboto,droid-sans,Arial,sans-serif;
font-size:1vw;
}
h1{
font-size:2vw;
}
h2{
font-size:1.5vw;
}
a {
overflow: hidden;
white-space: nowrap;
text-overflow: ellipsis;
}
input {
font-size: 1.5vw;
}
@media screen and (max-width: 1024px) {
font-size: 6vw;
}
</style>
</head>
<body>
<div>
<h1><a href="$this->tool_url">WebArchiveBOT, archived items</a></h1>
<h2>This page lists the last 100 files uploaded to $this->sitename and their links archived at Internet Archive by Wayback Machine. Just press F5 for refresh.</h2>
<p>You can download the latest [<a href="?json_output=100">100</a>] [<a href="?json_output=1000">1.000</a>] [<a href="?json_output=10000">10.000</a>] files list in JSON format.</p>
<p>For more information, see the <a href="$this->tool_url/doc/index.html" target="blank">Documentation</a>.
<a href="https://github.com/Amitie10g/WebArchiveBOT" target="blank">Source code</a> is available at GitHub under the GNU Affero General Public License v3.</p>
<div>
Find a file (with the prefix <code>File:</code>)
<form method="get" action="$this->tool_url">
<input type="text" name="file">
<input type="submit">
</form>
</div>
</div>
<div>
EOC;
if(!empty($data)){
foreach($data as $title=>$item){
$url = $this->wiki_url . str_replace(array('%3A','%2F','%3F','%26','%3D','%23','%20',' '),array(':','/','?','&','=','#','_','_'),rawurlencode($title));
$date = $item['timestamp'];
echo <<<EOC
<h2><a href="$url" target="blank">$title</a></h2>
<b>Uploaded: </b>$date (UTC)
<ul>
EOC;
foreach($item['urls'] as $link){
$escaped_link = str_replace(array('%3A','%2F','%3F','%26','%3D','%23','%20',' '),array(':','/','?','&','=','#','_','_'),rawurlencode($link));
echo <<<EOC
<li><a href="$escaped_link" target="blank">$link</a></li>
EOC;
}
echo <<<EOC
</ul>
EOC;
}
}else{
echo "<i>No data available</i>";
}
echo <<<EOC
</div>
</body>
</html>
EOC;
}
}