## Export All Data to Single CSV
This notebook will walk you through the process of exporting indicator data as a single CSV file

In [None]:
import os
import pyodbc
import csv
# [x for x in pyodbc.drivers() if x.startswith('Microsoft Access Driver')]
# if you see [], you may need to install the 64-bit ACE drivers
# https://github.com/mkleehammer/pyodbc/wiki/Connecting-to-Microsoft-Access
# 64bit drivers : https://www.microsoft.com/en-us/download/confirmation.aspx?id=13255

## Configure your paths and folders

In [None]:
access_database = r'Z:\dev\nepal-import\NepalInfo2016_for_python.accdb'
output_folder = 'nepal'
output_csv = 'devinfo_output.csv'

## Execute the query against the DevInfo Access tables

In [None]:
connStr = (
    r"Driver={{Microsoft Access Driver (*.mdb, *.accdb)}};"
    r"DBQ={};".format(access_database)
    )

cnxn = pyodbc.connect(connStr)

sql = """\
SELECT UT_Data.Indicator_NId, UT_Indicator_en.Indicator_Name, UT_Data.Data_Value, UT_Unit_en.Unit_NId, UT_Unit_en.Unit_Name, UT_Area_en.Area_ID, UT_Area_en.Area_Name, UT_Area_Level_en.Area_Level_Name, UT_Area_en.Area_Level, UT_Indicator_Classifications_en.IC_Name, UT_Indicator_Classifications_en.Publisher, UT_TimePeriod.TimePeriod, UT_Subgroup_Vals_en.Subgroup_Val, UT_Subgroup_Type_en.Subgroup_Type_Name, UT_Area_Map_Layer.Layer_NId
FROM ((((UT_Area_Map_Layer INNER JOIN ((UT_Area_Level_en INNER JOIN (UT_Subgroup_Vals_en INNER JOIN (UT_Unit_en INNER JOIN (UT_Indicator_en INNER JOIN (UT_Indicator_Unit_Subgroup INNER JOIN (UT_TimePeriod INNER JOIN (UT_Indicator_Classifications_en INNER JOIN (UT_Area_en INNER JOIN UT_Data ON UT_Area_en.[Area_NId] = UT_Data.[Area_NId]) ON UT_Indicator_Classifications_en.IC_NId = UT_Data.Source_NId) ON UT_TimePeriod.TimePeriod_NId = UT_Data.TimePeriod_NId) ON UT_Indicator_Unit_Subgroup.IUSNId = UT_Data.IUSNId) ON UT_Indicator_en.Indicator_NId = UT_Indicator_Unit_Subgroup.Indicator_NId) ON UT_Unit_en.Unit_NId = UT_Indicator_Unit_Subgroup.Unit_NId) ON UT_Subgroup_Vals_en.Subgroup_Val_NId = UT_Indicator_Unit_Subgroup.Subgroup_Val_NId) ON UT_Area_Level_en.Area_Level = UT_Area_en.Area_Level) INNER JOIN UT_Area_Map ON UT_Area_en.Area_NId = UT_Area_Map.Area_NId) ON UT_Area_Map_Layer.Layer_NId = UT_Area_Map.Layer_NId) INNER JOIN UT_Area_Map_Metadata_en ON UT_Area_Map_Layer.Layer_NId = UT_Area_Map_Metadata_en.Layer_NId) INNER JOIN UT_Subgroup_Vals_Subgroup ON UT_Subgroup_Vals_en.Subgroup_Val_NId = UT_Subgroup_Vals_Subgroup.Subgroup_Val_NId) INNER JOIN UT_Subgroup_en ON UT_Subgroup_Vals_Subgroup.Subgroup_NId = UT_Subgroup_en.Subgroup_NId) INNER JOIN UT_Subgroup_Type_en ON UT_Subgroup_en.Subgroup_Type = UT_Subgroup_Type_en.Subgroup_Type_NId
ORDER BY UT_Data.Indicator_NId
"""

crsr = cnxn.execute(sql)

rows = crsr.fetchall()

print ('sucessfully executed data query :: {} rows returned'.format(len(rows)))

## Prepare to query the DevInfo Access Database
Here we will map our DevInfo fields to the DSD as defined here. **TODO :: add link(s) to reference data schema**

This is our ouptut data schema that will be in the CSV file.

In [None]:
# 'DSD_FIELD' : 'DevInfoField'
field_mappings = {
    'INDICATOR_ID' : 'Indicator_NId',
    'INDICATOR' : 'Indicator_Name',
    'REF_AREA' : 'Area_Name',
    'REF_AREA_ID' : 'Area_ID',
    'OBS_VALUE' : 'Data_Value',
    'UNIT_ID' : 'Unit_NId',
    'UNIT' : 'Unit_Name',
    'TIME_PERIOD' : 'TimePeriod'
}

# add in any additional fields you want in the output
add_fields = {
    'Publisher' : 'Publisher'
}

# combine the two dictionaries
field_mappings = {**field_mappings, **add_fields}

## Write the result to a CSV file

In [None]:
output_path = os.path.join(output_folder, output_csv)
with open(output_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    header_row = []
    for field in field_mappings:
        header_row.append(field)
        
    writer.writerow(header_row)
    
    for row in rows:
        new_row = []
        for field in field_mappings:
            new_row.append(getattr(row, field_mappings[field]))
        
        writer.writerow(new_row)

print ('csv successfully created at {}'.format(os.path.abspath(output_csv)))