Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 80 lines (68 sloc) 3.45 KB
# generate some unchanging stats (blog rankings, journal paper shares etc.)
# there's no point in working these out dynamically as they only change when there's been a pipeline update
use lib (".");
use strict;
use DBI;
use config qw(%config log log_error urldecode $DEBUG parse_post_xml url_breakdown);
use helper qw(download_url);
use Digest::MD5 qw(md5_hex);
my $connection_string = sprintf("dbi:mysql:%s:%s", $config{"db_name"}, $config{"db_host"});
my $db = DBI->connect($connection_string, $config{"db_user"}, $config{"db_password"}) or log_error("Couldn't connect to the database.\n");
# let's only summarize links from the past x days.
my $age_limit = 30;
# we have two "summary" tables, for links and posts
my $sql = $db->prepare("DELETE FROM links_summary");
# repopulate the tables with data
my $sql = $db->prepare("SET group_concat_max_len = 4096");
my $sql = $db->prepare("
INSERT INTO links_summary
(categories, url_hash, post_titles, linked_by, url, domain, titles, page_title, last_linked_on)
GROUP_CONCAT(DISTINCT t2.tag) AS categories,
links.url_hash, GROUP_CONCAT(DISTINCT CONCAT(p2.title,'===',p2.url,'===',p2.post_id,'===',blogs.title,'===',blogs.url,'===',blogs.blog_id) SEPARATOR '|||') AS post_titles, COUNT(DISTINCT links.blog_id) AS linked_by, links.url, domain, GROUP_CONCAT(DISTINCT TRIM(links.title) SEPARATOR ', ') AS title, links.page_title AS page_title, MAX(p2.pubdate) AS last_linked_on FROM links LEFT JOIN posts ON posts.url_hash = links.url_hash, posts AS p2, blogs, tags AS t2 WHERE blogs.blog_id = p2.blog_id AND p2.post_id = links.post_id AND ISNULL(posts.post_id) AND links.type = 'link' AND ISNULL(links.paper_id) AND length(trim(links.title)) >= 3 AND t2.blog_id = p2.blog_id AND DATEDIFF(CURRENT_TIMESTAMP(), p2.pubdate) <= ? GROUP BY links.url_hash
print "Done repopulation links...\n";
my $sql = $db->prepare("DELETE FROM posts_summary");
my $sql = $db->prepare("
INSERT INTO posts_summary
(post_id, blog_id, title, url, url_hash, summary, filename, author, pubdate, added_on, blog_name, blog_image, linked_by)
SELECT posts.post_id, posts.blog_id, posts.title, posts.url, posts.url_hash, posts.summary, posts.filename,, posts.pubdate, posts.added_on, blogs.title AS blog_name, blogs.image AS blog_image, COUNT(DISTINCT IF(ISNULL(links.blog_id),NULL,IF(links.blog_id = posts.blog_id,NULL,links.blog_id))) AS linked_by FROM blogs, posts LEFT JOIN links ON links.url_hash = posts.url_hash WHERE blogs.blog_id = posts.blog_id GROUP BY posts.url_hash
print "Done repopulation posts...\n";
my $sql = $db->prepare("DELETE FROM papers_summary");
my $sql = $db->prepare("
INSERT INTO papers_summary
(paper_id, isbn_id, image, doi_id, pubmed_id, arxiv_id, pii_id, journal, title, abstract, authors, pubdate, added_on, url, cited_by, reviewed, blog_ids)
links.url AS url,
COUNT(DISTINCT links.blog_id) AS cited_by,
SUM(IF(links.type = 'review', 1, 0)) AS reviewed,
GROUP_CONCAT(DISTINCT links.blog_id SEPARATOR ',') AS blog_ids
FROM papers LEFT JOIN links ON links.paper_id = papers.paper_id GROUP BY papers.paper_id;
print "Done repopulation papers...\n";
Jump to Line
Something went wrong with that request. Please try again.