From cb57e36a1045482dc784e54b1770caf8b110a65e Mon Sep 17 00:00:00 2001
From: grammarware <vadim@grammarware.net>
Date: Sun, 27 Jan 2013 18:15:25 +0100
Subject: [PATCH] Calculate community sizes; Helper scripts for reporting
 useful statistics

---
 dblp/community.py | 40 ++++++++++++++++++++++++++++++++++++++++
 dblp/reportx.py   | 25 +++++++++++++++++++++++++
 dblp/reporty.py   | 25 +++++++++++++++++++++++++
 3 files changed, 90 insertions(+)
 create mode 100755 dblp/community.py
 create mode 100755 dblp/reportx.py
 create mode 100755 dblp/reporty.py

diff --git a/dblp/community.py b/dblp/community.py
new file mode 100755
index 0000000..7c34e49
--- /dev/null
+++ b/dblp/community.py
@@ -0,0 +1,40 @@
+#! /usr/bin/env python
+# this script computes the comminity size per year
+import sys
+
+names = {}
+
+po = open('rdf/partOf.curated.txt','r')
+for line in po.readlines():
+	x,r,y = line.strip().split('"')[1:4]
+	if r == ' partOf ' and y == "MoDELS":
+		names[x] = []
+po.close()
+
+pa = open('rdf/publishedAt.txt','r')
+buf = 1000000
+tmplines = pa.readlines(buf)
+while tmplines:
+	for line in tmplines:
+		x,r,y = line.strip().split('"')[1:4]
+		if r == ' publishedAt ':
+			if y in names.keys():
+				if x not in names[y]:
+					names[y].append(x)
+	tmplines = pa.readlines(buf)
+pa.close()
+
+years = {}
+for n in names.keys():
+	y = n.replace('/','').replace('-','')
+	for x in range(ord('a'),ord('z')+1):
+		y = y.replace(chr(x),'')
+	y = y[0:4]
+	if y not in years:
+		years[y] = []
+	for name in names[n]:
+		if name not in years[y]:
+			years[y].append(name)
+
+for y in sorted(years.keys()):
+	print y,'   ',len(years[y])
diff --git a/dblp/reportx.py b/dblp/reportx.py
new file mode 100755
index 0000000..11e17ab
--- /dev/null
+++ b/dblp/reportx.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+# this script goes over partOf and curates it according to sameAs
+import sys
+
+venues = []
+rel = sys.argv[1]
+if len(sys.argv)>2:
+	print 'Using','rdf/%s.really.curated.txt' % rel
+	sa = open('rdf/%s.really.curated.txt' % rel,'r')
+else:
+	print 'Using','rdf/%s.curated.txt' % rel
+	sa = open('rdf/%s.curated.txt' % rel,'r')
+buf = 1000000
+tmplines = sa.readlines(buf)
+while tmplines:
+	for line in tmplines:
+		x,r,y = line.strip().split('"')[1:4]
+		if r == (' %s ' % rel):
+			z = x
+			if z not in venues:
+				venues.append(z)
+	tmplines = sa.readlines(buf)
+sa.close()
+
+print len(venues)
diff --git a/dblp/reporty.py b/dblp/reporty.py
new file mode 100755
index 0000000..989d2b1
--- /dev/null
+++ b/dblp/reporty.py
@@ -0,0 +1,25 @@
+#! /usr/bin/env python
+# this script goes over partOf and curates it according to sameAs
+import sys
+
+venues = []
+rel = sys.argv[1]
+if len(sys.argv)>2:
+	print 'Using','rdf/%s.really.curated.txt' % rel
+	sa = open('rdf/%s.really.curated.txt' % rel,'r')
+else:
+	print 'Using','rdf/%s.curated.txt' % rel
+	sa = open('rdf/%s.curated.txt' % rel,'r')
+buf = 1000000
+tmplines = sa.readlines(buf)
+while tmplines:
+	for line in tmplines:
+		x,r,y = line.strip().split('"')[1:4]
+		if r == (' %s ' % rel):
+			z = y
+			if z not in venues:
+				venues.append(z)
+	tmplines = sa.readlines(buf)
+sa.close()
+
+print len(venues)