In [1]:
# Manipulate the file system
import os
import shutil
import datetime
import arrow
import requests

import hashlib

from time import sleep

# Display errors in realtime
import ipywidgets as widgets
import time
import re

# This used to be a part of dataset but was extracted to its own library
# https://github.com/pudo/datafreeze
from datafreeze import freeze

# Export database table to CSV
import csv

# Copy dictionaries
import copy

# Convert stored string representation of a list to a list
import ast

# Recurse through a directory tree and return file names with glob
import glob

# Decode and re-encode mangled Arabic file names
import codecs

# Connect to a SQLite database in a lazy manner.
import dataset
import sqlalchemy

# Enables opening and reading of Excel files
import openpyxl

# Translating variables, sheet names, and workbook names from Arabic
# This is NOT free to use.
from google.cloud import translate

# Set the environment variable for the Google Service Account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Users\\clay\\Documents\\fxb-lcs-2b24f4f8a73a.json'

In [2]:
#If there's an existing db for this sheet, delete it
#so that we can copy from the template for a fresh start
new_db_name = "sams_data_phase25.sqlite"

try:
    os.remove(new_db_name)
    print("Removed template clone ", new_db_name)
except:
    pass

try:
    # Try to preserve a copy in case there is a problem and it has to be restored
    shutil.copy2("sams_data_phase24_output_2018-04-08.sqlite",new_db_name)
    
    print("Created database from template: ", new_db_name)
except:
    pass

Removed template clone  sams_data_phase25.sqlite
Created database from template:  sams_data_phase25.sqlite


In [3]:
# AirTable API key

airtable_key = "Bearer REDACTED" #"Authorization: Bearer YOUR_API_KEY"
request_headers = {
    "Authorization": airtable_key,
    "Content-type": "application/json"
}
base_url = "https://api.airtable.com/v0/app6szidbcJlRxj5g/"
#response = requests.get("https://api.airtable.com/v0/app6szidbcJlRxj5g/countries", headers=request_headers)

In [4]:
#response.status_code

In [5]:
# j = response.json()
# j['records'][0]

A record looks like..

```
{'createdTime': '2018-04-12T00:31:52.000Z',
 'fields': {'Name': 'Lebanon',
  'facilities': ['recrdrwrxBP3x80In',
   'recTD0aDLWLnAByou',
   'rectfTbBWyM1YQXn4',
   'recRLHVj2skUkUNBW',
   'rechm0kgRfDFqxjvJ',
   'recb1lR2rNK0QCX4D',
   'recp38qKiXRkOLGxv',
   'recHS2gucodddPd5P',
   'reciesEp9eR407apZ',
   'recw58HyQ0Xh6ui71',
   'recD5U9wUPGVuv60j'],
  'facility_codes': ['SAMS081',
   'SAMS040',
   'SAMS168',
   'SAMS152',
   'SAMS047',
   'SAMS050',
   'SAMS151',
   'SAMS030',
   'SAMS210',
   'SAMS071',
   'SAMS072']},
 'id': 'rec195re9CCNPh8PF'}
 ```

In [6]:
def create_lookup_table():
    lookup = {}
    
    # Countries
    countries_response = requests.get(base_url + "countries", headers=request_headers)
    for rec in countries_response.json()['records']:
        lookup[rec['id']] = rec['fields']['Name']
    
    sleep(1)
        
    # Governates
    gov_response = requests.get(base_url + "governorates", headers=request_headers)
    for rec in gov_response.json()['records']:
        lookup[rec['id']] = rec['fields']['Name']
        
    sleep(1)
    
    # Districts
    dist_response = requests.get(base_url + "districts", headers=request_headers)
    for rec in dist_response.json()['records']:
        lookup[rec['id']] = rec['fields']['Name']
        
    sleep(1)
    
    subd_response = requests.get(base_url + "subdistricts", headers=request_headers)
    for rec in subd_response.json()['records']:
        lookup[rec['id']] = rec['fields']['Name']
        
    return lookup

In [7]:
l = create_lookup_table()

In [8]:
l

{'rec0KCYHvjLw3Fkfg': 'Msaifra',
 'rec0VfRGBig2Qd6fe': 'Rachaya',
 'rec195re9CCNPh8PF': 'Lebanon',
 'rec1k94bekbOjSPNL': 'Deir-ez-Zor',
 'rec4AP3SaQb4UJole': 'Mzeireb',
 'rec6LqzAJsTbkQ1zZ': 'Salqin',
 'rec6ZFUzE33UOwkXF': 'Homs',
 'rec7EFQHi4JEOfgZj': 'Larisa',
 'rec7KYkjG6cC2qiv4': 'Ehsem',
 'rec84nJUkBogjnpgC': 'Bennsh',
 'rec8ITGK8rEUdp0BA': 'Kastoria',
 'rec8jDpaNHzWgSxfy': 'Daraa',
 'rec931r1GpqDfienO': 'Kafr Batna',
 'rec93V0FIpbOXNHfh': 'Ryhanlia',
 'rec94mACR9HCQqISf': 'Idlib',
 'rec9KzT5JDtOb0jL7': 'Kafr Nobol',
 'rec9TSiyiO1SWWAbt': 'Aghtrin',
 'recA6tiVKkfDDpnHm': 'Aleppo',
 'recAzKkeXXKgpcE4S': 'Katarini',
 'recBKjweAglgTKrmm': 'Volos',
 'recCLyCzRjt3yhBD9': 'Quneitra',
 'recDCndIWNZnON07q': 'Haritan',
 'recDLuTPYmbXnSMZ6': 'Idlib',
 'recDdgHk9HTQM5BfM': 'Rural Damascus',
 'recECoYD2dJms6DmO': 'Quneitra',
 'recEY7HiziUzIBfCW': 'Khan Arnaba',
 'recEbvTxd5HK4qx67': 'Harim',
 'recFXV6VGbip7Vdvy': 'Deir-ez-Zor',
 'recFkTmYEFznYsvbj': 'Idlib',
 'recGBtufCBBoc2oBV': 'Rehanya',
 

If there are more records, the response will contain an offset. To fetch the next page of records, include offset in the next request's parameters.

In [9]:
responses = []
facilities_response = requests.get(base_url + "facilities", headers=request_headers)
responses.append(facilities_response.json())

while 'offset' in facilities_response.json():
    sleep(1)
    params = {'offset':facilities_response.json()['offset']}
    facilities_response = requests.get(base_url + "facilities", 
                                       headers=request_headers,
                                       params=params)
    responses.append(facilities_response.json())

In [10]:
len(responses)

4

In [11]:
#data = facilities_response.json()

In [12]:
#data['records'][0]

In [13]:
fac_records = []
for data in responses:
    for rec in data['records']:
        facrec = {}
        for key in rec['fields'].keys():
            if key in ['# Linked Files','files']:
                continue
            elif key in ['country', 'governorate', 'district', 'subdistrict']:
                facrec[key] = l[rec['fields'][key][0]]
            elif rec['fields'][key] == 'No':
                facrec[key] = 0
            elif rec['fields'][key] == 'Yes':
                facrec[key] = 1
            elif key == 'facility_id':
                facrec['id'] = rec['fields']['facility_id']
            else:
                facrec[key] = rec['fields'][key]

        for f in ['dateopened','dateclosed','reopened1','closed1','reopened2','closed2']:
            if f not in facrec.keys():
                facrec[f] = None

        for f in ['country_id','governorate_id','district_id','subdistrict_id']:
            facrec[f] = None

        fac_records.append(facrec)

In [14]:
fac_records[0]

{'closed1': None,
 'closed2': None,
 'country': 'Syria',
 'country_id': None,
 'dateclosed': None,
 'dateopened': '2014-01-10',
 'dental': 0,
 'district': 'Izra',
 'district_id': None,
 'er': 0,
 'facility_code': 'SAMS154',
 'facility_type': 'PHC',
 'facilityname': 'Muwafeq Dakhl Alla Clinic',
 'governorate': 'Daraa',
 'governorate_id': None,
 'icu': 0,
 'id': 60,
 'mentalhealth': 0,
 'openstatus': 'open',
 'organization': 'SAMS',
 'orthopedic': 0,
 'parent_facility_id': 59,
 'pediatric': 0,
 'phc': 1,
 'reopened1': None,
 'reopened2': None,
 'rh': 0,
 'subdistrict': 'Tassil',
 'subdistrict_id': None}

In [15]:
db = dataset.connect('sqlite:///' + new_db_name)

In [16]:
tab_facilities = db['facilities']

In [17]:
for rec in fac_records:
    tab_facilities.update(rec,['id'])

In [18]:
r = None

for rec in fac_records:
    if rec['id'] == 1:
        r = rec
        continue

In [19]:
r

{'closed1': None,
 'closed2': None,
 'country': 'Jordan',
 'country_id': None,
 'dateclosed': None,
 'dateopened': '2017-11-15',
 'dental': 1,
 'district': 'Unknown',
 'district_id': None,
 'facility_code': 'SAMS206',
 'facility_type': 'Polyclinic',
 'facilityname': 'Zaatari',
 'governorate': 'Unknown',
 'governorate_id': None,
 'id': 1,
 'mentalhealth': 1,
 'openstatus': 'open',
 'organization': 'SAMS',
 'orthopedic': 1,
 'pediatric': 1,
 'phc': 1,
 'reopened1': None,
 'reopened2': None,
 'rh': 1,
 'subdistrict': 'Unknown',
 'subdistrict_id': None}

In [20]:
len(fac_records)

322

## Now run queries to generate the flagged datasets

In [21]:
# You can change this query to export a different set of data
result = db.query("""
SELECT  files.id as files_id,
        files.year,
        files.month,
        files.year || '-' || files.month || '-01' AS full_date,
        facilities.id AS facility_id,
        facilities.facility_parent_id,
        facilities.facilityname,
        facilities.country,
        facilities.governorate,
        facilities.district,
        facilities.subdistrict,
        facilities.facility_type,
        full_raw_flags.flag_abdomen,
        full_raw_flags.flag_abdominal_pain,
        full_raw_flags.flag_allergy,
        full_raw_flags.flag_anemia,
        full_raw_flags.flag_animal_insect_bite,
        full_raw_flags.flag_back,
        full_raw_flags.flag_blast,
        full_raw_flags.flag_bleed,
        full_raw_flags.flag_blunt,
        full_raw_flags.flag_burn,
        full_raw_flags.flag_cancer,
        full_raw_flags.flag_cardiovascular,
        full_raw_flags.flag_chest,
        full_raw_flags.flag_complication,
        full_raw_flags.flag_conflict_related,
        full_raw_flags.flag_congenital,
        full_raw_flags.flag_constipation,
        full_raw_flags.flag_dehydration,
        full_raw_flags.flag_dental_complaint,
        full_raw_flags.flag_derm,
        full_raw_flags.flag_diabetes,
        full_raw_flags.flag_diarrhea_dysentery,
        full_raw_flags.flag_endocrine,
        full_raw_flags.flag_ENT,
        full_raw_flags.flag_explosive,
        full_raw_flags.flag_eye,
        full_raw_flags.flag_facial,
        full_raw_flags.flag_fatigue,
        full_raw_flags.flag_fever,
        full_raw_flags.flag_follow_up,
        full_raw_flags.flag_fracture,
        full_raw_flags.flag_gi_complaint,
        full_raw_flags.flag_growth_delay,
        full_raw_flags.flag_gu,
        full_raw_flags.flag_gunshot,
        full_raw_flags.flag_gyn_women,
        full_raw_flags.flag_head,
        full_raw_flags.flag_headache,
        full_raw_flags.flag_history_of,
        full_raw_flags.flag_hyperlipidemia,
        full_raw_flags.flag_infection,
        full_raw_flags.flag_injury,
        full_raw_flags.flag_injury_neuro,
        full_raw_flags.flag_liver_dysfunction,
        full_raw_flags.flag_lower_extremity,
        full_raw_flags.flag_malnutrition,
        full_raw_flags.flag_mental_health,
        full_raw_flags.flag_musculoskeletal_pain,
        full_raw_flags.flag_nausea_vomiting,
        full_raw_flags.flag_neck,
        full_raw_flags.flag_nerve,
        full_raw_flags.flag_neuro_complaint,
        full_raw_flags.flag_neurologic,
        full_raw_flags.flag_orthopedic,
        full_raw_flags.flag_other_infection,
        full_raw_flags.flag_pain,
        full_raw_flags.flag_pelvic,
        full_raw_flags.flag_pregnancy,
        full_raw_flags.flag_renal,
        full_raw_flags.flag_respiratory,
        full_raw_flags.flag_shrapnel,
        full_raw_flags.flag_spinal,
        full_raw_flags.flag_spine,
        full_raw_flags.flag_stab,
        full_raw_flags.flag_stroke,
        full_raw_flags.flag_suspected,
        full_raw_flags.flag_traffic_accident,
        full_raw_flags.flag_trauma,
        full_raw_flags.flag_upper_extremity,
        full_raw_flags.flag_urologic,
        full_raw_flags.flag_vascular,
        full_raw_flags.flag_wound,
        full_raw_flags.flag_comprehensive_injury

FROM full_raw_flags
JOIN files on files.id = full_raw_flags.file_id
JOIN facilities on files.facility_id = facilities.id

WHERE files.facility_id IS NOT NULL 
AND files.month IS NOT NULL
AND files.skipped = 0
AND files.ignore = 0;
""")

# This used to be a part of dataset but was extracted to its own library
# https://github.com/pudo/datafreeze
freeze(result, format='csv', filename='full_raw_flags_2018_04_29.csv')

In [22]:
# This is optional and will generate a copy of the database that will be gigabytes in size.
shutil.copy2(new_db_name,'sams_data_phase25_output_2018-04-29.sqlite')

'sams_data_phase25_output_2018-04-29.sqlite'

Need to reimport the locations and recreate the locations table.