# Tactus analysis

Analysis of Tactus AdB data for data paper (2020).

In [3]:
import os
import re
import sys
import xml.etree.ElementTree as ET
sys.path.insert(1, '/home/erikt/project/e-mental-health/data-processing')
import tactus2table

In [4]:
DATADIR = "/home/erikt/projects/e-mental-health/usb/tmp/20190917/"
INTAKE = "Intake"

## 1. How many clients have a therapy start date in their profile?

In [3]:
FILEPATTERN = r"^A.*z$"
QUERY1 = "./Treatment/StartDate"
STARTDATE = "STARTDATE"
NONE = "NONE"

def query1(root,query):
    try: text = root.findall(query)[0].text
    except: text = None
    if text is None: text = NONE
    elif text.strip() == "": text = NONE 
    else: text = STARTDATE
    return(text)

In [4]:
files = sorted(os.listdir(DATADIR))

data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text1 = query1(root,QUERY1)
        if text1 in data: data[text1] += 1
        else: data[text1] = 1
data

{'STARTDATE': 923, 'NONE': 1060}

## 2. How many clients sent an email to the counselor?

In [5]:
FILEPATTERN = r"^A.*z$"
QUERY2 = "./Messages/Message/Sender"
CLIENT = "CLIENT"
CLIENTMAIL = "CLIENTMAIL"
NOCLIENTMAIL = "NOCLIENTMAIL"

def query2(root,query):
    clientMail = NOCLIENTMAIL
    for node in root.findall(query): 
        text = node.text.strip()
        if text == CLIENT:
            clientMail = CLIENTMAIL
            break
    return(clientMail)

In [6]:
files = sorted(os.listdir(DATADIR))

data = {CLIENTMAIL:0,NOCLIENTMAIL:0}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text2 = query2(root,QUERY2)
        data[text2] += 1
data

{'CLIENTMAIL': 1125, 'NOCLIENTMAIL': 858}

## 3. How many of the intake forms have the title Vragenlijst?

In [7]:
FILEPATTERN = r"^A.*z$"
QUERY3 = "./Intake/Questionnaire/Title"

def query3(root,query):
    try: text = root.findall(query)[0].text.strip()
    except: text = NONE
    return(text)

In [8]:
files = sorted(os.listdir(DATADIR))

data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text3 = query3(root,QUERY3)
        if text3 in data: data[text3] += 1
        else: data[text3] = 1
data

{'Intake': 1937, 'Vragenlijst': 46}

## 4. Combination of 1, 2 and 3

In [9]:
data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        text1 = query1(root,QUERY1)
        text2 = query2(root,QUERY2)
        text3 = query3(root,QUERY3)
        text4 = " ".join([text1,text2,text3])
        if text4 in data: data[text4] += 1
        else: data[text4] = 1
data

{'STARTDATE CLIENTMAIL Intake': 770,
 'NONE NOCLIENTMAIL Intake': 716,
 'STARTDATE NOCLIENTMAIL Intake': 131,
 'NONE CLIENTMAIL Intake': 320,
 'NONE CLIENTMAIL Vragenlijst': 14,
 'NONE NOCLIENTMAIL Vragenlijst': 10,
 'STARTDATE CLIENTMAIL Vragenlijst': 21,
 'STARTDATE NOCLIENTMAIL Vragenlijst': 1}

## 5. Count Intake Question sets

In [10]:
QUERY5 = "./Intake/Questionnaire/Content/question"
FILEPATTERN = r"^A.*z$"

def getAnswerIds(root):
    answerIds = []
    lastQuestionNbr = "0"
    for questionNode in root.findall(QUERY5):
        try:
            try: questionNbr = questionNode.findall("./questionNumber")[0].text.strip()
            except: questionNbr = lastQuestionNbr
            for answerNode in questionNode.findall("./answer"):
                answerIds.append(questionNbr+"-"+answerNode.attrib["ID"])
            lastQuestionNbr = questionNbr
        except: pass
    return(answerIds)

In [11]:
files = sorted(os.listdir(DATADIR))

answerIdStrings = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        answerIds = getAnswerIds(root)
        answerIdString = " ".join(answerIds)
        if not answerIdString in answerIdStrings: answerIdStrings[answerIdString] = 1
        else: answerIdStrings[answerIdString] += 1

answerIdStrings = {k:answerIdStrings[k] for k in sorted(answerIdStrings.keys(),key=lambda key:answerIdStrings[key],reverse=True)}
for key in answerIdStrings:
    print("clients: {0}; answers: {1}; answer fields: {2}".format(answerIdStrings[key],len(key.split()),key.split()[0:3]))

clients: 1014; answers: 259; answer fields: ['1-geslacht0', '2-leeftijd0', '3-national0']
clients: 881; answers: 140; answer fields: ['1-geslacht', '2-leeftijd', '3-woonsit']
clients: 46; answers: 27; answer fields: ['1-geslacht0', '2-medi0', '2-medicijnr0']
clients: 42; answers: 236; answer fields: ['1-geslachtt0', '2-leeftijdt0', '3-woonsitt0']


## 6. Combination of 4 and 5

In [13]:
QUESTION2 = 1

data = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        answerId = getAnswerIds(root)[QUESTION2]
        text1 = query1(root,QUERY1)
        text2 = query2(root,QUERY2)
        text3 = query3(root,QUERY3)
        text4 = " ".join([text1,text2,text3,answerId])
        if text4 in data: data[text4] += 1
        else: data[text4] = 1
data

{'STARTDATE CLIENTMAIL Intake 2-leeftijd': 683,
 'NONE NOCLIENTMAIL Intake 2-leeftijd0': 667,
 'NONE NOCLIENTMAIL Intake 2-leeftijd': 48,
 'STARTDATE NOCLIENTMAIL Intake 2-leeftijd0': 6,
 'NONE CLIENTMAIL Intake 2-leeftijd0': 292,
 'STARTDATE NOCLIENTMAIL Intake 2-leeftijd': 122,
 'STARTDATE CLIENTMAIL Intake 2-leeftijd0': 49,
 'NONE CLIENTMAIL Intake 2-leeftijd': 28,
 'NONE CLIENTMAIL Vragenlijst 2-medi0': 14,
 'STARTDATE CLIENTMAIL Intake 2-leeftijdt0': 38,
 'NONE NOCLIENTMAIL Vragenlijst 2-medi0': 10,
 'STARTDATE CLIENTMAIL Vragenlijst 2-medi0': 21,
 'NONE NOCLIENTMAIL Intake 2-leeftijdt0': 1,
 'STARTDATE NOCLIENTMAIL Intake 2-leeftijdt0': 3,
 'STARTDATE NOCLIENTMAIL Vragenlijst 2-medi0': 1}

## 7. Count all the questionnaires

In [21]:
QUERY7 = ".//Questionnaire/Title"
FILEPATTERN = r"^A.*z$"

In [23]:
files = sorted(os.listdir(DATADIR))

titles = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        for title in root.findall(QUERY7):
            titleText = re.sub(r" RCT-.","",title.text.strip())
            if titleText in titles: titles[titleText] += 1
            else: titles[titleText] = 1
titles

{'Intake': 1937,
 'Voordelen, nadelen': 774,
 'Drink Wijzer': 594,
 'Lijst tussenmeting': 519,
 'Doel stellen': 588,
 'Lijst motivatie': 447,
 'Anders denken': 453,
 'Anders doen': 421,
 'Beslissingen': 399,
 'Actieplan': 360,
 'Lijst nameting': 335,
 'Lijst 3 maanden': 251,
 'Lijst half jaar': 189,
 'Vragenlijst': 46,
 'Tussenmeting': 31,
 'Drop out': 7,
 'Lijst 9 maanden': 7}

## 8. Combination of 7 and 4

In [31]:
TARGETKEY = "2-medi0 Lijst half jaar"

files = sorted(os.listdir(DATADIR))

titles = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        answerId = getAnswerIds(root)[QUESTION2]
        for title in root.findall(QUERY7):
            titleText = answerId+" "+re.sub(r" RCT-.","",title.text.strip())
            if titleText in titles: titles[titleText] += 1
            else: titles[titleText] = 1
            if titleText == TARGETKEY: print(inFileName)
{key:titles[key] for key in sorted(titles.keys())}

AdB0718.xml.gz
AdB1102.xml.gz
AdB1343.xml.gz
AdB1612.xml.gz
AdB1771.xml.gz


{'2-leeftijd Actieplan': 296,
 '2-leeftijd Anders denken': 381,
 '2-leeftijd Anders doen': 356,
 '2-leeftijd Beslissingen': 336,
 '2-leeftijd Doel stellen': 426,
 '2-leeftijd Drink Wijzer': 509,
 '2-leeftijd Intake': 881,
 '2-leeftijd Lijst 3 maanden': 208,
 '2-leeftijd Lijst half jaar': 153,
 '2-leeftijd Lijst motivatie': 407,
 '2-leeftijd Lijst nameting': 272,
 '2-leeftijd Lijst tussenmeting': 468,
 '2-leeftijd Voordelen, nadelen': 670,
 '2-leeftijd0 Actieplan': 29,
 '2-leeftijd0 Anders denken': 31,
 '2-leeftijd0 Anders doen': 28,
 '2-leeftijd0 Beslissingen': 27,
 '2-leeftijd0 Doel stellen': 115,
 '2-leeftijd0 Drink Wijzer': 38,
 '2-leeftijd0 Intake': 1014,
 '2-leeftijd0 Lijst 3 maanden': 14,
 '2-leeftijd0 Lijst half jaar': 12,
 '2-leeftijd0 Lijst motivatie': 30,
 '2-leeftijd0 Lijst nameting': 28,
 '2-leeftijd0 Lijst tussenmeting': 39,
 '2-leeftijd0 Voordelen, nadelen': 46,
 '2-leeftijdt0 Actieplan': 26,
 '2-leeftijdt0 Anders denken': 29,
 '2-leeftijdt0 Anders doen': 28,
 '2-leeftijd

## 9. Get the therapy steps

In [14]:
FILEPATTERN = r"^AdB.*z$"
QUERY9 = "./Treatment/TreatmentSteps/TreatmentStep/Title"

files = sorted(os.listdir(DATADIR))

titleDict = {}
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        titles = []
        for title in root.findall(QUERY9): titles.append(title.text.strip())
        titleString = "#".join(titles)
        if titleString in titleDict: titleDict[titleString] += 1
        else: titleDict[titleString] = 1
{titleString:titleDict[titleString] for titleString in sorted(titleDict.keys())}

{'': 903,
 'Afsluiting': 1,
 'Afsluiting#Na 3 maanden#Na half jaar': 1,
 'Doel stellen': 133,
 'Doel stellen#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 8,
 'Doel stellen#Afsluiting': 7,
 'Doel stellen#Afsluiting#Na 3 maanden#Na half jaar': 4,
 'Doel stellen#Gewoontes doorbreken#Actieplan#Afsluiting#Na 3 maanden#Na half jaar': 1,
 'Doel stellen#Gewoontes doorbreken#Afsluiting': 1,
 'Gewoontes doorbreken': 1,
 'Intake': 1,
 'Voordelen, nadelen': 163,
 'Voordelen, nadelen#Afsluiting': 10,
 'Voordelen, nadelen#Alcoholschrift bijhouden': 114,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Afsluiting': 7,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Anders doen': 1,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren': 48,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Afsluiting': 5,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten': 35,
 'Voordelen, nadelen#Alcoholschrift bijhouden#Situaties analyseren#Meten en weten

## 10. Get therapy completion information

In [33]:
QUERY10 = "./Treatment/TreatmentSteps/TreatmentStep"
TITLE = "./Title"
ADVANTAGES = "Voordelen, nadelen"
ACTIONPLAN = "Actieplan"
SUBMISSIONDATE = "./SubmissionDate"
APPROVALDATE = "./ApprovalDate"

def checkTreatmentStep(root,targetTitle):
    treatmentStepFinished = False
    for treatmentStep in root.findall(QUERY10): 
        for title in treatmentStep.findall(TITLE):
            if title.text.strip() == targetTitle:
                treatmentStepFinished = True
                for date in treatmentStep.findall(SUBMISSIONDATE): 
                    if date.text == None: treatmentStepFinished = False
                for date in treatmentStep.findall(APPROVALDATE): 
                    if date.text == None: treatmentStepFinished = False
    return(treatmentStepFinished)

nbrOfAdvantagesStarted = 0
nbrOfActionPlanFinished = 0
for inFileName in files:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        titles = []
        if checkTreatmentStep(root,ADVANTAGES): 
            nbrOfAdvantagesStarted += 1
            if checkTreatmentStep(root,ACTIONPLAN): nbrOfActionPlanFinished += 1
nbrOfAdvantagesStarted,nbrOfActionPlanFinished

(774, 354)

## Get the text of questions

In [None]:
QUERY6 = "./Intake/Questionnaire/Content/question"

for inFileName in ["AdB0174.xml.gz"]:
    if re.search(FILEPATTERN,inFileName):
        root = tactus2table.readRootFromFile(DATADIR+inFileName)
        lastQuestionNbr = "0"
        for questionNode in root.findall(QUERY5):
            try:
                try: 
                    questionNbr = questionNode.findall("./questionNumber")[0].text.strip()
                    questionTitle = re.sub(r"\s+"," ",questionNode.findall("./title")[0].text.strip())
                except: 
                    questionNbr = lastQuestionNbr
                    questionTitle = ""
                answerNodes = questionNode.findall("./answer")
                for a in range(0,len(answerNodes)):
                    try: answerTitle = re.sub(r"\s+"," ",answerNodes[a].findall("./title")[0].text.strip())
                    except: answerTitle = ""
                    if answerTitle != "": 
                        if a == 0 and questionNbr != lastQuestionNbr: print(questionNbr,questionTitle)
                        print(questionNbr+"-"+answerNodes[a].attrib["ID"],end=" ")
                        print(answerTitle)
                    else: 
                        print(questionNbr+"-"+answerNodes[a].attrib["ID"],end=" ")
                        print(questionTitle)
                lastQuestionNbr = questionNbr
            except: pass