Find the most frequent participants

In [1]:
import os
import glob
import logging
from collections import Counter
import liegecolloquium

Prepare logger:

In [7]:
logger = logging.getLogger('counting-participants')
logger.setLevel(logging.DEBUG)

# Files and directories

In [3]:
datadir = "../data/"
participantfilelist = sorted(glob.glob(os.path.join(datadir, "ParticipantList-19*.tsv")))
outputdir = "../data/"
outputfile = os.path.join(outputdir, "ParticipantCountry.geojson")
logger.info("Working on {0} participant files".format(len(participantfilelist)))

# Read data
We provide a list of files to be read in order to create a list of participants for the period of interest (full period, decades, ...)

## Participant list

In [11]:
participantlist = []
namelist = []
namelist_noliege = []
namelist_nobelgium = []
nptotal = 0
for participantfile in participantfilelist:
    logger.debug("Working on file {0}".format(os.path.basename(participantfile)))
    with open(participantfile, "r") as f:
        for line in f:
            l = line.rstrip().split('\t')
            participant = liegecolloquium.Participant(l[0], l[1], l[2], l[3], l[4])
            participant.replace_country()
            participantlist.append(participant)
            namelist.append("".join((l[1].rstrip(), l[0].rstrip())))
            if participant.country != "Belgium":
                namelist_nobelgium.append("".join((l[1].rstrip(), l[0].rstrip())))
            else:
                if participant.city != "Liège":
                    namelist_noliege.append("".join((l[1].rstrip(), l[0].rstrip())))
logger.info("Total participants: {}".format(len(participantlist)))
logger.info("Participant outside Belgium: {}".format(len(namelist_nobelgium)))
logger.info("Participant outside Liège: {}".format(len(namelist_noliege)))

In [12]:
c1 = Counter(namelist)
c2 = Counter(namelist_nobelgium)
c3 = Counter(namelist_noliege)

Find 10 most frequent:

In [6]:
c1.most_common(10)

[('J.C.J.Nihoul', 11),
 ('A.Disteche', 10),
 ('G.Lebon', 10),
 ('Y.Adam', 9),
 ('G.Pichot', 9),
 ('Y.Runfola', 9),
 ('J.Smitz', 8),
 ("G.Chabert d'Hières", 7),
 ('A.Bah', 7),
 ('C.J.Frankignoul', 6)]

In [13]:
c2.most_common(10)

[("G.Chabert d'Hières", 7),
 ('H.G.Ramming', 5),
 ('C.J.Frankignoul', 4),
 ('C.Le Provost', 4),
 ('A.Bah', 4),
 ('A.Edwards', 3),
 ('A.J.Elliott', 3),
 ('L.Magaard', 3),
 ('J.D.Woods', 3),
 ('M.Crepon', 3)]

In [14]:
c3.most_common(10)

[('J.L.Van Hamme', 5),
 ('G.Pichot', 4),
 ('W.Bayens', 3),
 ('G.Schayes', 2),
 ('G.Billen', 2),
 ('J.J.Peters', 2),
 ('A.Berger', 2),
 ('Y.Adam', 2),
 ('A.L.Berger', 1),
 ('P.Glansdorff', 1)]