In [4]:
import pandas as pd
import json
import csv
import glob
import os

def get_vars(csvfile):
    """
    Returns the vars used by the CBS as a dict.
    """
    with open(csvfile, "rbU") as f:
        reader = reader=csv.reader(f, delimiter=';')
        return {row[0]:row[1] for row in reader}

variables = {os.path.basename(fn)[:-4]: get_vars(fn) for fn in glob.glob('data/csv/vars/*')
             if not fn.endswith("~")}

In [2]:
def get_json(var, variables, years, location = "data/csv/"):
    """
    Returns a json string per item in which the data for the given years is included.
    
    var: string
    variables: nested dictionary
    years: tuple -> ints
    location: string
    
    E.g.
    
    get_json("aantal_mannen", variables, (2003, 2014))
    """
    
    JSON = {var: {}}
    
    for year in xrange(years[0], years[1] + 1):
        year = str(year)
        item = variables[var][year]
        areades = variables["regioaanduiding"][year]
        index = variables["regionaam"][year]
        data = pd.DataFrame.from_csv(location + year + ".csv")

        # Lower all input and all columns (CBS uses lower/upper case for its columns throughout the years).
        item, areades, index = item.lower(), areades.lower(), index.lower()
        data.columns = map(str.lower, data.columns)

        # First select only all gemeentes, then filter based on given item and index. Set given index.
        df = data[(data[areades] == 'Gemeente') | (data[areades] == 'G')].filter([item, index]).set_index(index)
        
        # Write to JSON.
        JSON[var][year] = json.loads(df[item].to_json())
            
    return JSON

def write_json(variables):
    for var in variables:
        if not var in ["regionaam", "buurtcode"]:
            with open("data/json/" + var + ".json", 'w') as f:
                json.dump(get_json(var, variables, (2004, 2014), location = "data/csv/"), f)

In [20]:
def list_columns(year, location):
    year = str(year)
    return sorted(pd.DataFrame.from_csv(location + year + ".csv").columns)




In [5]:
get_json("aantal_mannen", (2003, 2014))

TypeError: get_json() takes at least 3 arguments (2 given)