In [None]:
import csv
import sys

ASFILE = "../usb/ovk/data/eriktks/AS/text/AS-mails.csv"
ESFILE = "../usb/ovk/data/eriktks/ES/text/ES-mails.csv"
SEPARATOR = ","
CLIENT = "client-id"
COUNSELOR = "counselor"

In [None]:
OVKMETAFILE = "../usb/ovk/data/eriktks/spss/opve.csv"
IDFIELDNAME = "onderzoeksnummer1"
EXITFIELDNAME = "Redenstoppen"

def readMetaData():
    exitData = {}
    cesdDiff = {}
    mhcDiff = {}

    try: inFile = open(OVKMETAFILE,"r")
    except Exception as e: sys.exit("cannot read file "+OVKMETAFILE+": "+str(e))
    csvReader = csv.DictReader(inFile,delimiter=SEPARATOR)
    for row in csvReader: 
        exitData[row[IDFIELDNAME]] = row[EXITFIELDNAME].strip()
        if row["CESD_TOT_t0"] != "NA" and row["CESD_TOT_t1"] != "NA": 
            cesdDiff[row[IDFIELDNAME]] = int(row["CESD_TOT_t1"])-int(row["CESD_TOT_t0"])
        if row["MHCtot_t0"] != "NA" and row["MHCtot_t1"] != "NA": 
            mhcDiff[row[IDFIELDNAME]] = float(row["MHCtot_t1"])-float(row["MHCtot_t0"])
    inFile.close()
    return(exitData,cesdDiff,mhcDiff)

exitData,cesdDiff,mhcDiff = readMetaData()

In [None]:
def readData():
    clientMails = {"unknown":{},"finished":{},"stopped":{}}
    counselorMails = {"unknown":{},"finished":{},"stopped":{}}
    counselorClients = {"unknown":{},"finished":{},"stopped":{}}
    for inFileName in [ASFILE,ESFILE]:
        try: inFile = open(inFileName,"r")
        except Exception as e: sys.exit(str(e)+" Cannot read file "+inFileName+": "+str(e))
        csvReader = csv.DictReader(inFile,delimiter=SEPARATOR)
        for row in csvReader:
            try:
                client = row[CLIENT]
                counselor = row[COUNSELOR]
                if not client in exitData: treatmentStatus = "unknown"
                elif exitData[client] == "": treatmentStatus = "finished"
                else: treatmentStatus = "stopped"
                if not client in clientMails[treatmentStatus]: clientMails[treatmentStatus][client] = 0
                clientMails[treatmentStatus][client] += 1
                if not counselor in counselorMails[treatmentStatus]: counselorMails[treatmentStatus][counselor] = 0
                counselorMails[treatmentStatus][counselor] += 1
                if not counselor in counselorClients[treatmentStatus]: counselorClients[treatmentStatus][counselor] = {}
                if not client in counselorClients[treatmentStatus][counselor]: 
                    counselorClients[treatmentStatus][counselor][client] = True
            except Exception as e: sys.exit(str(e)+" Unexpected row in file "+inFileName+": "+str(row))
        inFile.close()
    return(clientMails,counselorMails,counselorClients)

def fillEmptySpots(myDict,filler):
    for ts1 in myDict:
        for key in myDict[ts]:
            for ts2 in myDict:
                if ts2 != ts1 and not key in myDict[ts2]:
                    myDict[ts2][key] = filler
    return(myDict)

clientMails,counselorMails,counselorClients = readData()
counselorMails = fillEmptySpots(counselorMails,0)
counselorClients = fillEmptySpots(counselorClients,{})

In [None]:
import numpy as np
import matplotlib.pyplot as plt

COLORS = { "unknown":"grey","finished":"blue","stopped":"red"}

def computeAverage(myDict):
    myValues = {}
    for key1 in myDict:
        for key2 in myDict[key1]:
            if not key2 in myValues: myValues[key2] = myDict[key1][key2]
            else: myValues[key2] += myDict[key1][key2]
    return(np.average(list(myValues.values())))

def computeMinMax(myDict):
    myMin = None
    myMax = None
    for key in myDict:
        if len(myDict[key]) > 0:
            minList = min(myDict[key].keys())
            maxList = max(myDict[key].keys())
            if myMin == None or myMin > minList: myMin = minList
            if myMax == None or myMax < maxList: myMax = maxList
    return(myMin,myMax)

In [None]:
plt.figure(figsize=(15,8))
plt.subplot(2,1,1)
nbrOfClients = np.sum([len(clientMails[x]) for x in clientMails])
average = computeAverage(clientMails)
plt.title("OVK data: Number of mails per client: "+str(nbrOfClients)+" clients; "+str(len(clientMails["stopped"]))+
          " stopped; average number of mails per client: "+str(int(average)))
for ts in clientMails:
    plt.bar([int(x) for x in clientMails[ts].keys()],[x for x in clientMails[ts].values()],color=COLORS[ts])
myMin,myMax = computeMinMax(clientMails)
plt.plot((int(myMin),int(myMax)),(average,average),"y")

plt.subplot(2,2,3)
average = computeAverage(counselorMails)
plt.title("Mails per counselor (average: "+str(int(average))+")")
countsUnknown = [x for x in counselorMails["unknown"].values()]
countsStopped = [x for x in counselorMails["stopped"].values()]
countsFinished = [x for x in counselorMails["finished"].values()]
countsUnknownStopped = np.add(countsUnknown,countsStopped)
plt.bar([x for x in range(1,len(countsUnknown)+1)],countsUnknown,color=COLORS["unknown"])
plt.bar([x for x in range(1,len(countsStopped)+1)],countsStopped,color=COLORS["stopped"],bottom=countsUnknown)
plt.bar([x for x in range(1,len(countsFinished)+1)],countsFinished,color=COLORS["finished"],bottom=countsUnknownStopped)
#plt.bar([i for i in range(0,len(counselorMails["finished"]))],[x for x in counselorMails["finished"].values()])
plt.plot((0.5,[len(counselorMails[x]) for x in counselorMails][0]+0.5),(average,average),"y")

plt.subplot(2,2,4)
countsUnknown = [len(x) for x in counselorClients["unknown"].values()]
countsStopped = [len(x) for x in counselorClients["stopped"].values()]
countsFinished = [len(x) for x in counselorClients["finished"].values()]
countsUnknownStopped = np.add(countsUnknown,countsStopped)
average = np.average(np.add(countsUnknown,np.add(countsStopped,countsFinished)))
plt.title("Clients per counselor (average: "+str(int(average))+")")
plt.bar([x for x in range(1,len(countsUnknown)+1)],countsUnknown,color=COLORS["unknown"])
plt.bar([x for x in range(1,len(countsStopped)+1)],countsStopped,color=COLORS["stopped"],bottom=countsUnknown)
plt.bar([x for x in range(1,len(countsFinished)+1)],countsFinished,color=COLORS["finished"],bottom=countsUnknownStopped)
plt.plot((0.5,[len(counselorClients[x]) for x in counselorClients][0]+0.5),(average,average),"y")

plt.show()