In [1]:
import csv

# pip install dataset if you dont have it
# http://dataset.readthedocs.io/en/latest/

import dataset

# Create a connection to the database

db = dataset.connect('sqlite:///sams_data_phase17.sqlite')

In [2]:
# Get the records from the database so we can make them pretty for Excel

statement = """
        SELECT f.facilityname, 
               f.facility_code, 
               files.year || '-' || SUBSTR('0' || CAST(files.month AS VARCHAR(2)), -2, 2) AS date_str,
               COUNT(raw.id) as num_records
        FROM files 
        JOIN facilities f 
        ON files.facility_id = f.id
        JOIN full_raw_scrubbed raw
        ON files.id = raw.a_file_id
        WHERE date_str IS NOT NULL
          AND skipped = 0
          AND ignore = 0
          AND problem_opening = 0
        GROUP BY facilityname, facility_code, date_str
        ORDER BY facility_code ASC, date_str ASC;
"""

In [3]:
# Execute the query and store the rows in data

data = []

for row in db.query(statement):
    data.append(row)

In [5]:
# Take a look at the data returned from the query

data[:2]

[OrderedDict([('facilityname', 'GHOUTA HOSPITAL'),
              ('facility_code', 'DAS001'),
              ('date_str', '2014-09'),
              ('num_records', 268)]),
 OrderedDict([('facilityname', 'GHOUTA HOSPITAL'),
              ('facility_code', 'DAS001'),
              ('date_str', '2014-10'),
              ('num_records', 264)])]

In [6]:
# Create a list of the unique dates because they will become headers in the CSV file

dates = sorted(list(set([r['date_str'] for r in data])))

# Check it for accuracy - this pretty prints it in jupyter notebooks
dates

['2014-07',
 '2014-08',
 '2014-09',
 '2014-10',
 '2014-11',
 '2014-12',
 '2015-01',
 '2015-02',
 '2015-03',
 '2015-04',
 '2015-05',
 '2015-06',
 '2015-07',
 '2015-08',
 '2015-09',
 '2015-10',
 '2015-11',
 '2015-12',
 '2016-01',
 '2016-02',
 '2016-03',
 '2016-04',
 '2016-05',
 '2016-06',
 '2016-07',
 '2016-08',
 '2016-09',
 '2016-10',
 '2016-11',
 '2016-12',
 '2017-01',
 '2017-02',
 '2017-03',
 '2017-04',
 '2017-05',
 '2017-06']

In [8]:
# We need a dict of unique facilities with codes and date fields. This is our matrix we will populate and export

facilities = {}

for row in data:
    code = row["facility_code"]
    if code not in facilities.keys():
        facilities[code] = {}
        facilities[code]["facility_code"] = code
        facilities[code]["facility_name"] = row["facilityname"]
        
        # Initialize a none value for each cell in the matrix we want to output
        for date in dates:
            facilities[code][date] = None

In [9]:
# Now populate the matrix

for row in data:
    code = row["facility_code"]
    date = row["date_str"]
    value = row["num_records"]
    facilities[code][date] = value

In [10]:
# Convert the dict of data to a list of dicts for each facility so that
# we can use dictwriter to export to CSV

restructured_data = []

for key in sorted(list(facilities.keys())):
    restructured_data.append(facilities[key])

In [11]:
# Verify that it looks like we want it
restructured_data[0]

{'2014-07': None,
 '2014-08': None,
 '2014-09': 268,
 '2014-10': 264,
 '2014-11': 259,
 '2014-12': 303,
 '2015-01': None,
 '2015-02': None,
 '2015-03': None,
 '2015-04': None,
 '2015-05': None,
 '2015-06': None,
 '2015-07': None,
 '2015-08': None,
 '2015-09': None,
 '2015-10': None,
 '2015-11': None,
 '2015-12': None,
 '2016-01': None,
 '2016-02': None,
 '2016-03': None,
 '2016-04': None,
 '2016-05': None,
 '2016-06': None,
 '2016-07': None,
 '2016-08': None,
 '2016-09': None,
 '2016-10': None,
 '2016-11': None,
 '2016-12': None,
 '2017-01': None,
 '2017-02': None,
 '2017-03': None,
 '2017-04': None,
 '2017-05': None,
 '2017-06': None,
 'facility_code': 'DAS001',
 'facility_name': 'GHOUTA HOSPITAL'}

In [13]:
# Now write it to a CSV file
# https://docs.python.org/3.6/library/csv.html#csv.DictWriter

fieldnames = restructured_data[0].keys()

with open("records_by_date_by_facility.csv", "w", newline="\n", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for row in restructured_data:
        writer.writerow(row)
