Create a `geoJSON` file storing the number of partipants per country.

In [1]:
import os
import glob
import logging
from collections import Counter
import geojson
import json
import liegecolloquium

Prepare logger:

In [2]:
logger = logging.getLogger('counting')
logger.setLevel(logging.DEBUG)

# Files and directories

We need:
1. a geoJSON file with the world countries
2. a list of CSV files storing with the list of participants.

In [3]:
datadir = "../data/"
countryfile = os.path.join(datadir, "countries.geo.json")
participantfilelist = sorted(glob.glob(os.path.join(datadir, "ParticipantList-19*.tsv")))
outputdir = "../data/"
outputfile = os.path.join(outputdir, "ParticipantCountry.geojson")
logger.info("Working on {0} participant files".format(len(participantfilelist)))

# Read data
We provide a list of files to be read in order to create a list of participants for the period of interest (full period, decades, ...)

## Participant list

In [4]:
lonlist, latlist, countrylist = [], [], []
nptotal = 0
for participantfile in participantfilelist:
    logger.debug("Working on file {0}".format(os.path.basename(participantfile)))
    with open(participantfile, "r") as f:
        for line in f:
            l = line.rstrip().split('\t')
            participant = liegecolloquium.Participant(l[0], l[1], l[2], l[3], l[4])
            participant.replace_country()
            #lonlist.append(l[5])
            #latlist.append(l[4])
            countrylist.append(participant.country)
    logging.info("Cumulated number of participants: {0}".format(len(countrylist)))

DEBUG:counting:Working on file ParticipantList-1973.tsv
DEBUG:counting:Working on file ParticipantList-1974.tsv
DEBUG:counting:Working on file ParticipantList-1975.tsv
DEBUG:counting:Working on file ParticipantList-1976.tsv
DEBUG:counting:Working on file ParticipantList-1977.tsv
DEBUG:counting:Working on file ParticipantList-1978.tsv
DEBUG:counting:Working on file ParticipantList-1979.tsv
DEBUG:counting:Working on file ParticipantList-1980.tsv
DEBUG:counting:Working on file ParticipantList-1981.tsv
DEBUG:counting:Working on file ParticipantList-1982.tsv
DEBUG:counting:Working on file ParticipantList-1983.tsv
DEBUG:counting:Working on file ParticipantList-1984.tsv
DEBUG:counting:Working on file ParticipantList-1985.tsv
DEBUG:counting:Working on file ParticipantList-1986.tsv
DEBUG:counting:Working on file ParticipantList-1987.tsv
DEBUG:counting:Working on file ParticipantList-1988.tsv


## Count participant per country
Use the `Counter` function and then sort.

In [5]:
ParticipantPerCountry = Counter(countrylist)
SortedParticipant = sorted(ParticipantPerCountry.keys())

# Add *participant* property to the country JSON file 

The geoJSON files contains the boundaries of the countries.    
We only have to add a new property to each feature, the number of participants, so that they can be read by Leaflet.

First we read the geoJSON file, which returns a dictionary.    
Then we will add the property *participants* for the countries that are in our list.

In [6]:
with open(countryfile, 'r') as f:
    CountryJson = json.load(f)
type(CountryJson)

dict

In [7]:
# Loop on the countries 
nc = 0
np = 0
countrylistref = []
for countries in CountryJson['features']:
    # Extract the country name
    countryName = countries['properties']['name']
    countrylistref.append(countryName)
    # If country name in the initial list, write number of participants
    if countryName in countrylist:
        nc += 1
        logger.info("{0} --> {1}".format(countryName, ParticipantPerCountry[countryName]))
        countries['properties']['Participants'] = ParticipantPerCountry[countryName]
        np += ParticipantPerCountry[countryName]
        
logger.info("{0} countries and {1} participants (should be {2}) ".format(nc, np, len(countrylist)))

INFO:counting:Australia --> 11
INFO:counting:Burundi --> 2
INFO:counting:Belgium --> 316
INFO:counting:Bulgaria --> 3
INFO:counting:Brazil --> 1
INFO:counting:Canada --> 57
INFO:counting:Switzerland --> 4
INFO:counting:China --> 8
INFO:counting:Ivory Coast --> 1
INFO:counting:Cameroon --> 1
INFO:counting:Germany --> 68
INFO:counting:Denmark --> 10
INFO:counting:Algeria --> 3
INFO:counting:Spain --> 12
INFO:counting:Estonia --> 15
INFO:counting:Finland --> 3
INFO:counting:France --> 150
INFO:counting:United Kingdom --> 106
INFO:counting:Guinea --> 4
INFO:counting:Greece --> 2
INFO:counting:India --> 2
INFO:counting:Ireland --> 3
INFO:counting:Italy --> 34
INFO:counting:Japan --> 9
INFO:counting:Kenya --> 1
INFO:counting:Kuwait --> 1
INFO:counting:Morocco --> 1
INFO:counting:Mexico --> 16
INFO:counting:New Caledonia --> 1
INFO:counting:Niger --> 6
INFO:counting:Netherlands --> 50
INFO:counting:Norway --> 24
INFO:counting:Peru --> 1
INFO:counting:Poland --> 1
INFO:counting:Portugal --> 3


In [13]:
# Find countries in our list that are not in the reference list
# (official names)
if np != len(countrylist):
    s1 = set(countrylistref)
    s2 = set(countrylist)
    s2.difference(s1)

## Create a new geoJSON with the additional properties
Now we can write the new geoJSON that `Leaflet` will ingest to produce a nice choropleth map.

In [14]:
with open(outputfile, 'w') as fp:
    fp.write("var participants = ")
    out = json.dumps(CountryJson, indent=4, separators=(',', ': '))
    fp.write(out + '\n')
logging.info("Output file created in directory {0}".format(outputdir))