From cb57e36a1045482dc784e54b1770caf8b110a65e Mon Sep 17 00:00:00 2001 From: grammarware Date: Sun, 27 Jan 2013 18:15:25 +0100 Subject: [PATCH] Calculate community sizes; Helper scripts for reporting useful statistics --- dblp/community.py | 40 ++++++++++++++++++++++++++++++++++++++++ dblp/reportx.py | 25 +++++++++++++++++++++++++ dblp/reporty.py | 25 +++++++++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100755 dblp/community.py create mode 100755 dblp/reportx.py create mode 100755 dblp/reporty.py diff --git a/dblp/community.py b/dblp/community.py new file mode 100755 index 0000000..7c34e49 --- /dev/null +++ b/dblp/community.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python +# this script computes the comminity size per year +import sys + +names = {} + +po = open('rdf/partOf.curated.txt','r') +for line in po.readlines(): + x,r,y = line.strip().split('"')[1:4] + if r == ' partOf ' and y == "MoDELS": + names[x] = [] +po.close() + +pa = open('rdf/publishedAt.txt','r') +buf = 1000000 +tmplines = pa.readlines(buf) +while tmplines: + for line in tmplines: + x,r,y = line.strip().split('"')[1:4] + if r == ' publishedAt ': + if y in names.keys(): + if x not in names[y]: + names[y].append(x) + tmplines = pa.readlines(buf) +pa.close() + +years = {} +for n in names.keys(): + y = n.replace('/','').replace('-','') + for x in range(ord('a'),ord('z')+1): + y = y.replace(chr(x),'') + y = y[0:4] + if y not in years: + years[y] = [] + for name in names[n]: + if name not in years[y]: + years[y].append(name) + +for y in sorted(years.keys()): + print y,' ',len(years[y]) diff --git a/dblp/reportx.py b/dblp/reportx.py new file mode 100755 index 0000000..11e17ab --- /dev/null +++ b/dblp/reportx.py @@ -0,0 +1,25 @@ +#! /usr/bin/env python +# this script goes over partOf and curates it according to sameAs +import sys + +venues = [] +rel = sys.argv[1] +if len(sys.argv)>2: + print 'Using','rdf/%s.really.curated.txt' % rel + sa = open('rdf/%s.really.curated.txt' % rel,'r') +else: + print 'Using','rdf/%s.curated.txt' % rel + sa = open('rdf/%s.curated.txt' % rel,'r') +buf = 1000000 +tmplines = sa.readlines(buf) +while tmplines: + for line in tmplines: + x,r,y = line.strip().split('"')[1:4] + if r == (' %s ' % rel): + z = x + if z not in venues: + venues.append(z) + tmplines = sa.readlines(buf) +sa.close() + +print len(venues) diff --git a/dblp/reporty.py b/dblp/reporty.py new file mode 100755 index 0000000..989d2b1 --- /dev/null +++ b/dblp/reporty.py @@ -0,0 +1,25 @@ +#! /usr/bin/env python +# this script goes over partOf and curates it according to sameAs +import sys + +venues = [] +rel = sys.argv[1] +if len(sys.argv)>2: + print 'Using','rdf/%s.really.curated.txt' % rel + sa = open('rdf/%s.really.curated.txt' % rel,'r') +else: + print 'Using','rdf/%s.curated.txt' % rel + sa = open('rdf/%s.curated.txt' % rel,'r') +buf = 1000000 +tmplines = sa.readlines(buf) +while tmplines: + for line in tmplines: + x,r,y = line.strip().split('"')[1:4] + if r == (' %s ' % rel): + z = y + if z not in venues: + venues.append(z) + tmplines = sa.readlines(buf) +sa.close() + +print len(venues)