In [1]:
import pandas as pd
import json
import csv
import glob
import os

def list_columns(year, location):
    year = str(year)
    return sorted(pd.DataFrame.from_csv(location + year + ".csv").columns)

def get_vars(csvfile):
    """
    Returns the vars used by the CBS as a dict.
    """
    with open(csvfile, "rbU") as f:
        reader = reader=csv.reader(f, delimiter=';')
        return {row[0]:row[1] for row in reader}

variables = {os.path.basename(fn)[:-4]: get_vars(fn) for fn in glob.glob('data/csv/vars/*')
             if not fn.endswith("~")}

In [2]:
def get_json(var, variables, years, location = "data/csv/"):
    """
    Returns a json string per item in which the data for the given years is included.
    
    var: string
    variables: nested dictionary
    years: tuple -> ints
    location: string
    
    E.g.
    
    get_json("aantal_mannen", variables, (2003, 2015))
    """
    
    JSON = {}
    
    for year in xrange(years[0], years[1] + 1):
        year = str(year)
        item = variables[var][year]
        areades = variables["regioaanduiding"][year]
        index = variables["regionaam"][year]
        data = pd.DataFrame.from_csv(location + year + ".csv")

        # Lower all input and all columns (CBS uses lower/upper case for its columns throughout the years).
        item, areades, index = item.lower(), areades.lower(), index.lower()
        data.columns = map(str.lower, data.columns)

        # First select only all gemeentes, then filter based on given item and index. Set given index.
        df = data[(data[areades] == 'Gemeente') | (data[areades] == 'G')].filter([item, index]).set_index(index)
        
        # Write to JSON.
        JSON[year] = json.loads(df[item].to_json())
            
    return JSON

def write_json(variables):
    for var in variables:
        if not var in ["aantal_ao_uitkering", "aantal_geboorte", "meest_voorkomende_postcode", 
                       "personen_ao_uitkering_totaal", "woningvoorraad_aantal", "regionaam", 
                       "buurtcode", "regioaanduiding", "gemeentecode"]:
            print "Currently writing %s.json." % var
            with open("data/json/" + var + ".json", 'w') as f:
                json.dump(get_json(var, variables, (2006, 2014), location = "data/csv/"), f)

In [3]:
write_json(variables)

  data = self._reader.read(nrows)
  data = self._reader.read(nrows)


Currently writing personenautos_naar_oppervlakte.json.
Currently writing motortweewielers.json.
Currently writing personenautos_totaal.json.
Currently writing niet_westers_totaal_percentage.json.
Currently writing aandeel_allochtonen_westers_percentage.json.
Currently writing bedrijsmotorvoertuigen.json.
Currently writing 15_tot_25_jaar.json.
Currently writing aandeel_allochtonen_marokko_percentages.json.
Currently writing bevolkingsdichtheid.json.
Currently writing 65_of_ouder.json.
Currently writing personenautos_per_huishouden.json.
Currently writing aantal_huishoudens.json.
Currently writing aandeel_allochtonen_turkije_percentage.json.
Currently writing 45_tot_64_jaar.json.
Currently writing oppervlakte_water.json.
Currently writing overig_niet_westers_percentage.json.
Currently writing voormalige_nederlandse_antillen_en_aruba.json.
Currently writing 0_tot_15_jaar.json.
Currently writing eenpersoonshuishoudens.json.
Currently writing omgevingsadressendichtheid.json.
Currently writi

  data = self._reader.read(nrows)
