Create a `geoJSON` file storing the number of partipants per country.

In [1]:
import os
import glob
import logging
from collections import Counter
import geojson
import json
import liegecolloquium

Prepare logger:

In [2]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Files and directories

We need:
1. a geoJSON file with the world countries
2. a list of CSV files storing the lists of participants.

In [3]:
datadir = "../data/tsv/"
countryfile = os.path.join(datadir, "../countries.geo.json")
participantfilelist = sorted(glob.glob(os.path.join(datadir, "ParticipantList-*.tsv")))
outputdir = "../data/"
outputfile = os.path.join(outputdir, "ParticipantCountry1970-2017.geojson")
logging.info("Working on {0} participant files".format(len(participantfilelist)))

INFO:root:Working on 39 participant files


# Read data
We provide a list of files to be read in order to create a list of participants for the period of interest (full period, decades, ...)

## Participant list

In [4]:
lonlist, latlist, countrylist = [], [], []
nptotal = 0
for participantfile in participantfilelist:
    # logging.debug("Working on file {0}".format(os.path.basename(participantfile)))
    with open(participantfile, "r") as f:
        for line in f:
            l = line.rstrip().split('\t')
            participant = liegecolloquium.Participant(l[0], l[1], l[2], l[3], l[4])
            participant.replace_country()
            
            #lonlist.append(l[5])
            #latlist.append(l[4])
            countrylist.append(participant.country)
    # logging.info("Cumulated number of participants: {0}".format(len(countrylist)))
    # logging.info("Cumulative number of different countries: {0}".format(len(set(countrylist))))

## Count participant per country
Use the `Counter` function and then sort.

In [5]:
ParticipantPerCountry = Counter(countrylist)
SortedParticipant = sorted(ParticipantPerCountry.keys())

In [6]:
sorted(ParticipantPerCountry.items(), key=lambda x: x[1], reverse=True)

[('Belgium', 730),
 ('United States of America', 500),
 ('France', 441),
 ('United Kingdom', 309),
 ('Germany', 268),
 ('Canada', 132),
 ('Italy', 117),
 ('Russian Federation', 116),
 ('Spain', 114),
 ('Netherlands', 113),
 ('Norway', 50),
 ('Japan', 45),
 ('Australia', 39),
 ('Denmark', 34),
 ('Portugal', 34),
 ('Sweden', 33),
 ('China', 29),
 ('Estonia', 27),
 ('Ukraine', 27),
 ('Uzbekistan', 23),
 ('South Africa', 21),
 ('Switzerland', 20),
 ('Mexico', 20),
 ('Greece', 16),
 ('Finland', 13),
 ('Korea, Republic of', 13),
 ('Bulgaria', 9),
 ('India', 9),
 ('Turkey', 9),
 ('Morocco', 9),
 ('Israel', 9),
 ('Chile', 9),
 ('Algeria', 7),
 ('Ireland', 6),
 ('Brazil', 6),
 ('Niger', 6),
 ('Tunisia', 6),
 ('Romania', 6),
 ('Guinea', 4),
 ('Poland', 4),
 ('Saudi Arabia', 4),
 ('Republic of Serbia', 2),
 ('Burundi', 2),
 ('Jordan', 2),
 ('Kazakhstan', 2),
 ('Iran, Islamic Republic of', 2),
 ('Iceland', 2),
 ('Argentina', 2),
 ('Oman', 2),
 ('Czech Republic', 2),
 ('Qatar', 2),
 ('Croatia', 2),

# Add *participant* property to the country JSON file 

The geoJSON files contains the boundaries of the countries.    
We only have to add a new property to each feature, the number of participants, so that they can be read by Leaflet.

First we read the geoJSON file, which returns a dictionary.    
Then we will add the property *participants* for the countries that are in our list.

In [35]:
with open(countryfile, 'r') as f:
    CountryJson = json.load(f)
type(CountryJson)

dict

In [41]:
# Loop on the countries 
nc = 0
np = 0
countrylistref = []
for countries in CountryJson['features']:
    # Extract the country name
    countryName = countries['properties']['name']
    countrylistref.append(countryName)
    
    # If country name in the initial list, write number of participants
    # Otherwise write zero (otherwise we get "undefined" in leaflet)
    if countryName in countrylist:
        nc += 1
        logger.info("{0} --> {1}".format(countryName, ParticipantPerCountry[countryName]))
        countries['properties']['Participants'] = ParticipantPerCountry[countryName]
        np += ParticipantPerCountry[countryName]
    else:
        countries['properties']['Participants'] = 0
        
logger.info("{0} countries and {1} participants (should be {2}) ".format(nc, np, len(countrylist)))

INFO:root:United Arab Emirates --> 1
INFO:root:Argentina --> 2
INFO:root:Australia --> 39
INFO:root:Austria --> 1
INFO:root:Burundi --> 2
INFO:root:Belgium --> 715
INFO:root:Bulgaria --> 8
INFO:root:Brazil --> 6
INFO:root:Canada --> 127
INFO:root:Switzerland --> 15
INFO:root:Chile --> 9
INFO:root:China --> 27
INFO:root:Ivory Coast --> 1
INFO:root:Cameroon --> 1
INFO:root:Czech Republic --> 2
INFO:root:Germany --> 262
INFO:root:Denmark --> 31
INFO:root:Algeria --> 7
INFO:root:Egypt --> 1
INFO:root:Spain --> 111
INFO:root:Estonia --> 26
INFO:root:Finland --> 12
INFO:root:France --> 436
INFO:root:United Kingdom --> 294
INFO:root:Guinea --> 4
INFO:root:Greece --> 16
INFO:root:Greenland --> 1
INFO:root:Croatia --> 2
INFO:root:Hungary --> 1
INFO:root:Indonesia --> 1
INFO:root:India --> 9
INFO:root:Ireland --> 6
INFO:root:Iran --> 2
INFO:root:Iceland --> 2
INFO:root:Israel --> 7
INFO:root:Italy --> 114
INFO:root:Jordan --> 2
INFO:root:Japan --> 41
INFO:root:Kazakhstan --> 2
INFO:root:Kenya --

In [42]:
# Find countries in our list that are not in the reference list
# (official names)
if np != len(countrylist):
    s1 = set(countrylistref)
    s2 = set(countrylist)
    print(s2.difference(s1))

## Create a new geoJSON with the additional properties
Now we can write the new geoJSON that `Leaflet` will ingest to produce a nice choropleth map.

In [43]:
with open(outputfile, 'w') as fp:
    fp.write("var participants = ")
    out = json.dumps(CountryJson, indent=4, separators=(',', ': '))
    fp.write(out + '\n')
logging.info("Output file created in directory {0}".format(outputdir))

INFO:root:Output file created in directory ../data/
