# Tactus metadata

Extract therapy result assessments from the metadata

In [None]:
import csv
import numpy as np
import os
import pandas as pd
import re
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
DIRECTORY = "/home/erikt/projects/e-mental-health/usb/tmp/20190917/"

All clients (1986) have filled in an intake form. However there are three different formats of intake form, which we distinguish by the label of question 1 about gender:
1. 1-geslacht0: 1045 clients
2. 1-geslacht: 897 clients
3. 1-geslachtt0: 44 clients

In [None]:
VARIANTS = ["1-geslacht0","1-geslacht","1-geslachtt0"]
FILENAMEPREFIX = "AdB"

def countFiles(directory,prefix):
    return(len([fileName for fileName in os.listdir(directory) \
                         if re.search("^"+prefix,fileName)]))

nbrOfFiles = [countFiles(DIRECTORY+"/"+variant,FILENAMEPREFIX) for variant in VARIANTS]
plt.bar(VARIANTS,nbrOfFiles)
plt.title("Number of clients per type of intake form")
nbrOfFiles

Few clients fill in one of the subsequents four questionaires: half-way, end, after three months and after six months. We want to use the first two to measure their progress in the therapy so we check how frequently these forms are present 

In [None]:
import sys
sys.path.insert(1, '/home/erikt/project/e-mental-health/data-processing')
import tactus2table

In [None]:
TITLE = "0-title"
TUSSENMETING = "Lijst tussenmeting"
NAMETING = "Lijst nameting"

questionnairesCountTussenmeting = {variant:0 for variant in VARIANTS}
questionnairesCountNameting = {variant:0 for variant in VARIANTS}
for variant in VARIANTS:
    for inFileName in os.listdir(DIRECTORY+"/"+variant):
        if re.search("^"+FILENAMEPREFIX,inFileName):
            root = tactus2table.readRootFromFile(DIRECTORY+"/"+variant+"/"+inFileName)
            questionnaires = tactus2table.getQuestionnaires(root,inFileName)
            for questionnaire in questionnaires:
                if questionnaire[TITLE] == TUSSENMETING:
                    questionnairesCountTussenmeting[variant] += 1
                if questionnaire[TITLE] == NAMETING:
                    questionnairesCountNameting[variant] += 1

In [None]:
questionnairesCountTussenmeting,questionnairesCountNameting

In [None]:
barWidth = 0.3
index = np.arange(len(VARIANTS))
plt.bar(index,nbrOfFiles,barWidth,label="intake")
plt.bar(index+barWidth,questionnairesCountTussenmeting.values(),
        barWidth,label="tussenmeting")
plt.bar(index+2*barWidth,
        questionnairesCountNameting.values(),
        barWidth,label="nameting")
plt.xticks(index+barWidth,VARIANTS)
plt.title("Number of responses per type of intake form")
plt.legend()
list(questionnairesCountTussenmeting.values())

Since the response frequencies for clients with intake format "1-geslacht" is a lot higher than those of the other two formats, it makes sense to focus on this format. Let's check how many of those 897 clients sent at least one email to the counselor.

In [None]:
VARIANT = "1-geslacht"
CLIENT = "CLIENT"
FROM = 1

clientSentEmailCounter = 0
for inFileName in os.listdir(DIRECTORY+"/"+VARIANT):
    if re.search("^"+FILENAMEPREFIX,inFileName):
        root = tactus2table.readRootFromFile(DIRECTORY+"/"+VARIANT+"/"+inFileName)
        emails = tactus2table.getEmailData(root,inFileName)
        clientSentEmail = False
        for email in emails:
            if email[FROM] == CLIENT: 
                clientSentEmail = True
                break
        if clientSentEmail: clientSentEmailCounter += 1
            
clientSentEmailCounter

Of the intermediate questionnaire, question 8 (8-intefft) is most interesting: *DO you consider this Internet therapy to be an effective method for changing alcohol-related habits?* (*Vind je internetbehandeling een effectieve methode om je drinkgewoonte te veranderen?*) A positive answer signals a positive effect of the therapy on the client. Let's see how they responded to this question.

In [None]:
QUESTION8 = "8-intefft"
QUESTION9 = "9-inteff1"

responses = {}
for inFileName in os.listdir(DIRECTORY+"/"+VARIANT):
    if re.search("^"+FILENAMEPREFIX,inFileName):
        root = tactus2table.readRootFromFile(DIRECTORY+"/"+VARIANT+"/"+inFileName)
        questionnaires = tactus2table.getQuestionnaires(root,inFileName)
        for questionnaire in questionnaires:
            if questionnaire[TITLE] == TUSSENMETING:
                if QUESTION8 in questionnaire:
                    response = questionnaire[QUESTION8]
                    if response in responses: responses[response] += 1
                    else: responses[response] = 1
                elif QUESTION9 in questionnaire:
                    response = questionnaire[QUESTION9]
                    if response in responses: responses[response] += 1
                    else: responses[response] = 1
                else: print("no inteff question in",inFileName)

responses

We expected a overwhelming positive response (because the people for which the therapy did not work could have stopped). The answers are indeed mostly positive but interestingly150 of the 470 clients responded with *I don't know* (*ik weet het niet*). Four clients filled in a different form where the question had number 9 instead of 8.  

In [None]:
inFileName = "AdB0010.xml.gz"
root = tactus2table.readRootFromFile(DIRECTORY+"/"+VARIANT+"/"+inFileName)
questionnaires = tactus2table.getQuestionnaires(root,inFileName)
questionnaires

In [None]:
data = pd.read_csv(DIRECTORY+"/"+FILE)

In [None]:
FIELDNAME = "14-drugs"
ages = []
for rowId in range(0,len(data)):
    ages.append(re.sub(" *leeftijd in jaren *","",data[FIELDNAME][rowId]))
agesPD = pd.Series(ages)

In [None]:
data["30-week2"].value_counts().sort_index().plot(kind="bar",figsize=(14,6))


In [None]:
# end form: 18-weekn 19-weekn

In [None]:
[x for x in data]


In [None]:
data["30-week2"]