In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from matplotlib.colors import ListedColormap
from datetime import datetime
import json
import ast

%pylab inline
pylab.rcParams['figure.figsize'] = (12.0, 6.0) # set size of figures"
plt.rcParams.update({'font.size': 24})
pd.options.display.max_rows=50
import re

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy


In [2]:
def find_sheet_name(target_text,sheet_names):
    '''Return a list of sheet names containing a specific string'''
    return [name for name in sheet_names if target_text.lower() in name.lower()]
def find_column_name(target,columns):
    return [name for name in columns if target.lower() in name.lower()]

def month_diff(b,a):
    '''month differnece between two pandas datetime objects'''
    return (b.month - a.month + (b.year - a.year)*12)

def pq_lookup(date):
    '''retun period, quarter given a date (year,month,day)'''
    #3 month index to period quarter
    quarter_map={1:(1,1),2:(1,2),3:(2,3),4:(2,4),5:(3,1),6:(3,2),7:(4,3),8:(4,4),9:(5,1),10:(5,2),11:(6,3),12:(6,4)}
    qnum=1+int(month_diff(date,pd.datetime(2016,4,1))/3)
    p,q=quarter_map[qnum]
    return p,q

def cleanUp(sentence):
    sentence=sentence.replace('succesfully','successfully')#ahh misspelling!
    sentence=sentence.replace('Centres','Centers')#ahh misspelling!
    sentence=sentence.replace('(','').replace(')','')
    sentence=re.sub("sex workers", "sex workers", sentence, flags=re.I)
    sentence=re.sub("SWs", "sex workers", sentence, flags=re.I)
    sentence=sentence.replace('&','and').replace('tested positive','newly diagnosed').replace('-','')
    sentence=' '.join(sentence.split()) #clean up extra whitespaces
    return sentence

stopwords=['number','of','-','by','the','who','for','(SR)','an']
def clean_line(sentence):
    return ' '.join([word for word in sentence.lower().split() if word not in stopwords])

def save_dict_as_json(filename, mapping):
    '''Save dict as json object'''
    with open(filename, 'w') as outfile:
        json.dump(mapping, outfile, indent=4, sort_keys=True, separators=(',', ':'))

def load_dict_from_json(filepath):
    '''given file path to json mapping return a dict'''
    with open(filepath) as data_file:    
        return json.load(data_file)

In [4]:
load_dict_from_json('meMAP.json')['A1']

u'A1 Number of sex workers reached with HIV prevention programs individual and/or smaller group level interventions'

<h1> Load lookup dictionary</h1>

In [7]:
ME=load_dict_from_json('meMAP.json')
#ME

In [8]:
def lookupME(description):
    description=cleanUp(clean_line(description.split(':')[-1]))
    words=description.lower().split()
    best_key,best_overlap='',0
    for key,val in ME.iteritems():
        val=clean_line(val)
        l=[re.findall(word, val) for word in words if len(word)>1]
        overlap=len([item for sublist in l for item in sublist]) / (len(val.split())+len(words))#divide by total len
        if (overlap>best_overlap):
            best_key,best_overlap=key,overlap
            #check for sucessfully and initiated (b vs a flag)
            if ((('successfully' in words) or ('initiated' in words)) & (best_key[-1]=='a')):
                    best_key.replace('a','b')
    return(best_key)

In [9]:
#save_dict_as_json('meMAP', ME)

In [10]:
lookupME('B4b: Number of newly diagnosed HIV positive SWs initiated on  ART'),\
lookupME('B8a: Number of HIV positive SWs succesfully referred for Adherence support'),\
lookupME(u'Number of GF - RED UMBRELLA condoms distributed.'),\
lookupME(u'Number of GF - RED UMBRELLA lube distributed.')

(u'B4b', u'B8b', u'F1', u'F3')

In [11]:
lookupME('behavioural_safety_intervention_e_g_substance_abuse_risk_reduction_plan'.replace('_',' '))

u'A1'

<h1> Lookup date from P,Q using the pq lookup function </h1>

In [29]:
pqMAP=dict()
#Dates=pd.PeriodIndex(freq='m', start='2016-4-1', periods=36) #three years
Dates = pd.date_range('2016-4-01', periods=36, freq='M')
Dates=[Date.replace(day=1) for Date in Dates]#DATE series by month defults to last day, we want the first
for Date in Dates:
    #print(Date.strftime('%Y-%m-%d'),pq_lookup(Date))
    if pq_lookup(Date) not in pqMAP.keys(): #only use first month for the quarter
        pqMAP[str(pq_lookup(Date))]=Date.strftime('%Y-%m-%d') #match pq to date
pqMAP

{'(1, 1)': '2016-06-01',
 '(1, 2)': '2016-09-01',
 '(2, 3)': '2016-12-01',
 '(2, 4)': '2017-03-01',
 '(3, 1)': '2017-06-01',
 '(3, 2)': '2017-09-01',
 '(4, 3)': '2017-12-01',
 '(4, 4)': '2018-03-01',
 '(5, 1)': '2018-06-01',
 '(5, 2)': '2018-09-01',
 '(6, 3)': '2018-12-01',
 '(6, 4)': '2019-03-01'}

In [30]:
np.save('pqMAP.npy',pqMAP)
save_dict_as_json('pqMAP.json', pqMAP)

In [51]:
pqMAP[str((1, 1))]

'2016-06-01'

In [46]:
tpqMAP = load_dict_from_json('pqMAP.json')

def lookupDate(p,q):
    '''Lookup the date corresponding to the first day of the period and quarter'''
    #dictioary key is a tuple as string '(p, q)'
    return (tpqMAP[str((p, q))])

In [47]:
lookupDate(2, 4)

u'2017-03-01'

<h1> Fix SR names </h1>

In [144]:
#make all CAPS
SR=SR.upper()

if SR == 'Nqobile Women':
    SR='NQOBILE'
if SR == 'THCA':
    SR ='TBHIV'
if SR== 'QAC':
    SR= 'QLAC'
if SR =='ll': #this is a guess
    SR='LLJBH'
    
# GRIP, HTT, LL *, PSH, Munna Ndi Nnyi ???

'HELLO'

In [48]:
srMAP={'cpc':'CPC','humana':'HUMANA','lljbh':'LLJBH','nqobile':'NQOBILE', 'nqobile women':'NQOBILE','phru':'PHRU',\
       'oasis':'OASIS','sweat':'SWEAT','tbhiv':'TBHIV','thca':'TBHIV','qlac':'QLAC','qac':'QLAC','wrhi':'WRHI',\
        'wrih':'WRHI','whri':'WRHI','ll':'LLJBH','llpc':'LLJBH','lifeline nc':'LLJBH','ll durban':'LLJBH',\
       'll fs':'LLJBH','ll zululand':'LLJBH'}

np.save('srMAP.npy',srMAP)
save_dict_as_json('srMAP.json', srMAP)

In [49]:
def lookupSR(SR):
    SR=SR.lower()
    if SR in srMAP.keys():
        SR=srMAP[SR]
    else:
        SR=SR.upper()
    return SR.lower()

In [50]:
lookupSR('qac')

'qlac'

<h3> test data fields </h3>

In [57]:
DATA_FIELDS = {
    'Counter (1 = first outreach for quarter)': [],
    'Number of services received in quarter': [],
    'Red Umbrella Male Condom & lube': [],
    'Other condoms (e.g. DoH)': [],

    'Female Condom': ['F2 Female condoms (Number Given)', 'FemaleCondom'],
    'F 1 Male Condoms(Number given)': ['Male Condom', 'F 1 Male Condoms (Number given)'],
    'F3 Lube': [],

    # Workshops
    'E4 Support group workshops': ['Support group'],

    # HTS Indicators
    'B 5 Known HIV positive Status': ['SWs with known HIV positive status'],
    'B 1 Received HTS and know results': ['Received HCT and know results', 'HCT provided by nurse (SW tested and know result)'],
    'B2 SW tested HIV negative': ['SWs who tested HIV negative'],
    'HIV negative SWs referred for PrEP': [],
    'B 7 HIV negative SWs receiving PrEP': ['HIV negative SWs receiving PrEP'],
    'B 3 Newly diagnosed HIV positive SW': ['nNewly diagnosed HIV positive SW'],
    'B4 A Referred for ART1= Referred': ['B4 A Referred for ART 1= Referred'],
    'B 4B Initiated on ART1= Initiated': ['B 4B Initiated on ART 1= Initiated'],
    'B6 SWs who refused HIV test': [ 'SWs who refused HIV test'],

    # STI
    'C1 SWs screened for Sexually Transmitted Infections (STI) 2': ['Ws screened for Sexually Transmitted Infections (STI)'],

    # Adherence
    'B 8A Referred to Treatment Adherence Clubs': [],
    'B 8 B Succesful referal to Treatment Adherence group': [],

    # STI
    'C2 STI Referral 1= Referred': [],
    'C2B Successful STI referal': [],

    # TB
    'D1 Number of SWs screened for TB': ['Number of SWs screened for TB'],
    'D2A TB Referal to health facility1=Referred': ['D2A TB Referal to health facility 1=Referred'],
    'D2B Succesful TB referal': [],

    # Human Rights
    'Did client face any Human rights violation': ['Human rights violation', 'Human rights violations'],
    'Referral to Human rights organisations1= Referred': ['Legal', 'Referral to Human rights organisations 1= Referred'],
    'Succesful Human Rights violations referal': [],

    # Other
    'IEC': [],
    'Risk Reduction Workshop': ['E2 Risk reduction Workshop', 'Risk reduction Workshop', 'Creative Space Workshop'],
    'Paralegal/ Human Rights Education/Health other (specify in comments)': [],
    'Health and sexual reproductive health services': [],
    'Virally supressed': [],
    'Behavioural / Safety intervention (e.g. substance abuse, risk reduction plan)': ['Psycho social'],
    'Reffered to Other (explain e.g. Home Affairs for ID)': [],
    'Creative Space Topic': [],
    'Creative Space name': [],
    'Goody bags': [],
}

In [60]:
[(lookupME(val),val) for val in DATA_FIELDS.keys()]

[(u'C2a', 'C2 STI Referral 1= Referred'),
 (u'G2a', 'Succesful Human Rights violations referal'),
 (u'C2a', 'B 8A Referred to Treatment Adherence Clubs'),
 (u'F3', 'F3 Lube'),
 (u'D1', 'D2A TB Referal to health facility1=Referred'),
 (u'F1', 'F 1 Male Condoms(Number given)'),
 ('', 'Goody bags'),
 (u'F2', 'Female Condom'),
 (u'G2a', 'Did client face any Human rights violation'),
 ('V3', 'B 4B Initiated on ART1= Initiated'),
 ('', 'Virally supressed'),
 (u'F1', 'Other condoms (e.g. DoH)'),
 (u'C1', 'C1 SWs screened for Sexually Transmitted Infections (STI) 2'),
 (u'E1', 'Risk Reduction Workshop'),
 (u'A2', 'Counter (1 = first outreach for quarter)'),
 (u'C2a', 'B 8 B Succesful referal to Treatment Adherence group'),
 (u'C2b', 'C2B Successful STI referal'),
 (u'C2a', 'Reffered to Other (explain e.g. Home Affairs for ID)'),
 (u'F1', 'Red Umbrella Male Condom & lube'),
 (u'B3', 'B 3 Newly diagnosed HIV positive SW'),
 ('V1', 'Health and sexual reproductive health services'),
 (u'B7', 'B 7 