Dependencies: ogr2ogr, topojson

Note: This script only updates the ddf--concepts and ddf--entities files, meaning they need to exist before executing this script.

In [32]:
import pandas as pd
import numpy as np
import os
from ddf_utils.str import to_concept_id
from ddf_utils.index import create_index_file

import subprocess
import zipfile
import shutil
import urllib

### Helpers

In [33]:
def shape2json(src, trg):
    cmd = "ogr2ogr -f GeoJSON " + trg + " " + src
    print "Running command: " + cmd
    subprocess.call(cmd, shell=True)

In [34]:
def geo2topo(src, trg):
    args = " --id-property HASC_2 -p municipality=NAME_2 -- "
    cmd = "topojson -o " + trg + args + src
    print "Running command: " + cmd
    subprocess.call(cmd, shell=True)

### Extract map IDs

In [35]:
def extract_map_ids(data):
    ids = data[["HASC_2","NAME_2"]].copy()
    ids.rename(columns={"HASC_2": "map_id","NAME_2": "name"}, inplace=True)
    
    # Fix irregularities manually
    ids["map_id"].iloc[268] = "SE.VG.ML"
    ids["map_id"].iloc[187] = "SE.VG.KN"
    
    return ids

### Update concepts

In [36]:
def update_concepts(out_dir, map_type, map_file, map_object):
    concept_file = os.path.join(out_dir, "ddf--concepts.csv")
    
    if not os.path.isfile(concept_file):
        print "Error: File not found " + concept_file
        return False
    else:
        print "Adding new map info..."
        data = pd.read_csv(concept_file, encoding="utf-8")
        data.loc[data["concept"] == "municipality","map_type"] = map_type
        data.loc[data["concept"] == "municipality","map_file"] = map_file
        data.loc[data["concept"] == "municipality","map_object"] = map_object
        data.fillna("")
        
        # Add new concepts if it does not exist already    
        if "map_id" not in data["concept"].values:
            new_concepts = [{"concept":"map_id","name": "Map ID","concept_type": "string"},\
                            {"concept":"map_type","name": "Map format","concept_type": "string"},\
                            {"concept":"map_file","name": "Path to map file","concept_type": "string"},\
                            {"concept":"map_object","name": \
                             "Name of JSON object with geo shapes (map_type specific)","concept_type": "string"}]
            df_new_concepts = pd.DataFrame(new_concepts)
            data = data.append(df_new_concepts,ignore_index=True)
            del df_new_concepts
            
        data = data[["concept", "name", "concept_type", "map_file", "map_object", "map_type"]] # Reorder columns
    return data

### Update entities

In [37]:
def update_entities(out_dir, ids):
    entities_file = os.path.join(out_dir,"ddf--entities--municipality.csv")
    
    if not os.path.isfile(entities_file):
        print "Error: File not found " + entities_file
        return False
    else:
        entities = pd.read_csv(entities_file, encoding="utf-8",\
                               converters={"municipality": lambda x: str(x), "county": lambda x: str(x)})

        # Hack needed due to encoding issues (create a tmp column used when merging the dataframes)
        if "map_id" not in entities.columns:
            entities["tmp"] = entities["name"].map(to_concept_id)
            ids["tmp"] = ids["name"].map(to_concept_id)
            entities = entities.set_index("tmp")
            ids = ids.set_index("tmp")
            new_entities = pd.concat([entities, ids["map_id"]], axis=1)
    
            return new_entities.sort_values(by="municipality")
        else:
            return entities

### Settings

In [38]:
# Filenames etc
out_dir = os.path.join(os.pardir,"output")
src = os.path.join(os.pardir, "src")

# Misc
zip_file = "SWE_adm_shp.zip"
topojson_object = "municipalities.json"
topojson_file = "shapes_municipalities.json"
download = False

# Raw data
geo_url = "http://biogeo.ucdavis.edu/data/gadm2.8/shp/SWE_adm_shp.zip" # Geo data

### Main script

Produces a topojson file with the following structure:

```
topojson_file = 

{
    type: ""
    transform: {},
    arcs: [],
    objects: {
                topojson_object: {}
    }
}
```

Where **topojson_object** is a geometry collection with all the polygons. For more info, see the [TopoJSON wiki](https://github.com/mbostock/topojson/wiki).

In [40]:
if __name__ == "__main__":
    
    # Download file
    if download:
        o = urllib.URLopener()
        zip_name = os.path.join(src, zip_file)
        o.retrieve(geo_url, zip_name)
    else:
        # Use downloaded file
        zip_name = os.path.join(src, zip_file)

    # Extract all
    zip_dir = os.path.join(src, zip_file.split(".")[0])
    with zipfile.ZipFile(zip_name, "r") as z:
        z.extractall(zip_dir)

    # Convert shape files to GeoJSON, then GeoJSON to TopoJSON
    shape_file = os.path.join(zip_dir, "SWE_adm2.shp")
    shape2json(shape_file, topojson_object)
    path = os.path.join(out_dir, topojson_file)
    geo2topo(topojson_object, path)

    # Add concept properties
    data = update_concepts(out_dir, "topojson", topojson_file, topojson_object.split(".")[0])
    path = os.path.join(out_dir, "ddf--concepts.csv")
    print "Printing " + path
    data.to_csv(path, index=False, encoding="utf-8")
    
    # Extract map IDs
    path = os.path.join(zip_dir, "SWE_adm2.csv")
    data = pd.read_csv(path, encoding="utf-8")
    ids = extract_map_ids(data)
    
    # Add map IDs to entities
    data = update_entities(out_dir, ids)
    path = os.path.join(out_dir, "ddf--entities--municipality.csv")
    print "Printing " + path
    data.to_csv(path, index=False, encoding="utf-8")
    
    # Cleanup
    shutil.rmtree(zip_dir)
    os.remove(topojson_object)
    
    # Create index file
    print("Creating index files...")
    create_index_file(out_dir)
    
    # Free memory
    del data, ids

Running command: ogr2ogr -f GeoJSON municipalities.json ../src/SWE_adm_shp/SWE_adm2.shp
Running command: topojson -o ../output/shapes_municipalities.json --id-property HASC_2 -p municipality=NAME_2 -- municipalities.json
Adding new map info...
Printing ../output/ddf--concepts.csv
Printing ../output/ddf--entities--municipality.csv
Creating index files...
