Create a `geoJSON` file storing the number of partipants per country.

In [1]:
import os
import glob
import logging
from collections import Counter
import geojson
import json
import liegecolloquium

Prepare logger:

In [2]:
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Files and directories

We need:
1. a geoJSON file with the world countries
2. a list of CSV files storing with the list of participants.

In [9]:
datadir = "../data/processed/"
countryfile = os.path.join(datadir, "countries.geo.json")
participantfilelist = sorted(glob.glob(os.path.join(datadir, "ParticipantList-2014.tsv")))
outputdir = "../data/"
outputfile = os.path.join(outputdir, "ParticipantCountry.geojson")
logging.info("Working on {0} participant files".format(len(participantfilelist)))

INFO:root:Working on 1 participant files


# Read data
We provide a list of files to be read in order to create a list of participants for the period of interest (full period, decades, ...)

## Participant list

In [10]:
lonlist, latlist, countrylist = [], [], []
nptotal = 0
for participantfile in participantfilelist:
    logging.debug("Working on file {0}".format(os.path.basename(participantfile)))
    with open(participantfile, "r") as f:
        for line in f:
            l = line.rstrip().split('\t')
            participant = liegecolloquium.Participant(l[0], l[1], l[2], l[3], l[4])
            participant.replace_country()
            #lonlist.append(l[5])
            #latlist.append(l[4])
            countrylist.append(participant.country)
    # logging.info("Cumulated number of participants: {0}".format(len(countrylist)))
    logging.info("Number of countries: {0}".format(len(set(countrylist))))
    print(set(countrylist))

DEBUG:root:Working on file ParticipantList-2014.tsv
INFO:root:Number of countries: 33


{'South Korea', 'Netherlands', 'Italy', 'Canada', 'United Kingdom', 'Russia', 'Turkey', 'Sweden', 'Poland', 'United States of America', 'Austria', 'Lithuania', 'Switzerland', 'Greece', 'Qatar', 'Namibia', 'South Africa', 'Denmark', 'Chile', 'Norway', 'France', 'Oman', 'Australia', 'India', 'Germany', 'Belgium', 'Usa', 'Mexico', 'UK', 'Morocco', 'Finland', 'Spain', 'Croatia'}


## Count participant per country
Use the `Counter` function and then sort.

In [18]:
ParticipantPerCountry = Counter(countrylist)
SortedParticipant = sorted(ParticipantPerCountry.keys())

# Add *participant* property to the country JSON file 

The geoJSON files contains the boundaries of the countries.    
We only have to add a new property to each feature, the number of participants, so that they can be read by Leaflet.

First we read the geoJSON file, which returns a dictionary.    
Then we will add the property *participants* for the countries that are in our list.

In [6]:
with open(countryfile, 'r') as f:
    CountryJson = json.load(f)
type(CountryJson)

dict

In [7]:
# Loop on the countries 
nc = 0
np = 0
countrylistref = []
for countries in CountryJson['features']:
    # Extract the country name
    countryName = countries['properties']['name']
    countrylistref.append(countryName)
    # If country name in the initial list, write number of participants
    if countryName in countrylist:
        nc += 1
        logger.info("{0} --> {1}".format(countryName, ParticipantPerCountry[countryName]))
        countries['properties']['Participants'] = ParticipantPerCountry[countryName]
        np += ParticipantPerCountry[countryName]
        
logger.info("{0} countries and {1} participants (should be {2}) ".format(nc, np, len(countrylist)))

In [8]:
# Find countries in our list that are not in the reference list
# (official names)
if np != len(countrylist):
    s1 = set(countrylistref)
    s2 = set(countrylist)
    s2.difference(s1)

## Create a new geoJSON with the additional properties
Now we can write the new geoJSON that `Leaflet` will ingest to produce a nice choropleth map.

In [14]:
with open(outputfile, 'w') as fp:
    fp.write("var participants = ")
    out = json.dumps(CountryJson, indent=4, separators=(',', ': '))
    fp.write(out + '\n')
logging.info("Output file created in directory {0}".format(outputdir))

# Print country names
Useful for the proper naming in the raw files.

In [6]:
for countries in CountryJson['features']:
    # Extract the country name
    print(countries['properties']['name'])

Afghanistan
Angola
Albania
United Arab Emirates
Argentina
Armenia
Antarctica
French Southern and Antarctic Lands
Australia
Austria
Azerbaijan
Burundi
Belgium
Benin
Burkina Faso
Bangladesh
Bulgaria
The Bahamas
Bosnia and Herzegovina
Belarus
Belize
Bermuda
Bolivia
Brazil
Brunei
Bhutan
Botswana
Central African Republic
Canada
Switzerland
Chile
China
Ivory Coast
Cameroon
Democratic Republic of the Congo
Republic of the Congo
Colombia
Costa Rica
Cuba
Northern Cyprus
Cyprus
Czech Republic
Germany
Djibouti
Denmark
Dominican Republic
Algeria
Ecuador
Egypt
Eritrea
Spain
Estonia
Ethiopia
Finland
Fiji
Falkland Islands
France
Gabon
United Kingdom
Georgia
Ghana
Guinea
Gambia
Guinea Bissau
Equatorial Guinea
Greece
Greenland
Guatemala
French Guiana
Guyana
Honduras
Croatia
Haiti
Hungary
Indonesia
India
Ireland
Iran
Iraq
Iceland
Israel
Italy
Jamaica
Jordan
Japan
Kazakhstan
Kenya
Kyrgyzstan
Cambodia
South Korea
Kosovo
Kuwait
Laos
Lebanon
Liberia
Libya
Sri Lanka
Lesotho
Lithuania
Luxembourg
Latvia
Morocc