In [3]:
import os
import sys
from datetime import datetime, timedelta
import math 
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt, ticker

from lib.display import Display
from lib.database import Database
from lib.schema import Schema

from lib.participant_builder import ParticipantsBuilder
from lib.studies.symptom_tracker import SymptomTracker

study = SymptomTracker()

In [32]:
db = Database(Schema.SYMPTOM_TRACKER)   
Display.header('User research on ALP', 'h1')

Display.header('Participants', 'h3')
participants_df = db.get_participants()
print(f'Number of participants: {len(participants_df)}')
Display.frame(participants_df, 5)

Display.header('Responses', 'h3')
responses_df = db.get_responses()
Display.frame(responses_df.drop(columns=['QUESTIONNAIRE_ID']), 5)

Number of participants: 33


Unnamed: 0,ALP_ID,EXTERNAL_ID,STATUS,START_DATE,END_DATE
0,14030,anonymous,enrolled,2022-07-09 00:00:00,NaT
1,14038,anonymous,enrolled,2022-07-07 00:00:00,NaT
2,14000,anonymous,withdrawn,2022-07-09 19:39:42,2022-07-10 10:03:27
3,13999,anonymous,withdrawn,2022-07-09 19:37:39,2022-07-09 19:39:07
4,13996,anonymous,enrolled,2022-07-08 00:00:00,NaT


Unnamed: 0,ALP_ID,VERSION,AUTHORED,QUESTIONNAIRE,LINK_ID,VALUE,VALUECODING_CODE,LANGUAGE,TEXT
0,13987,6.0.1,2021-08-06,covid19-covhub-symptom-tracking,abdominal-pain,,LA32-8,en,Abdominal pain
1,13987,6.0.1,2021-08-06,covid19-covhub-symptom-tracking,worsening-exhaustion,,LA32-8,en,Symptom worsening following exertion
2,13987,6.0.1,2021-08-06,covid19-covhub-symptom-tracking,SC,,LA32-8,en,Loss of taste or smell
3,13987,6.0.1,2021-08-06,covid19-covhub-symptom-tracking,dyssomnia,,LA33-6,en,Difficulty sleeping
4,13987,6.0.1,2021-08-06,covid19-covhub-symptom-tracking,fatigue,,LA32-8,en,Fatigue (chronic exhaustion)


In [6]:
builder = ParticipantsBuilder(study, participants_df, responses_df)
df = builder.filter_data4life_users() \
    .add_last_donation() \
    .add_all_submissions() \
    .get()

result = responses_df.groupby('ALP_ID').agg({'AUTHORED': ['min', 'max']}).reset_index()
result.columns = ['ALP_ID', 'first_donation', 'last_donation']
df = pd.merge(df, result, how='left', on=['ALP_ID']).drop(columns=['EXTERNAL_ID']).sort_values(by=['all'], ascending=False)

Display.frame(df, 5)

Unnamed: 0,ALP_ID,STATUS,START_DATE,END_DATE,ENROLED_ON,LAST_DONATION,all,symptoms,vaccine,test,first_donation,last_donation
25,14072,enrolled,2022-07-07,NaT,NaT,2022-07-09,141,139,2,0,2022-01-07,2022-07-09
8,14069,enrolled,2022-07-07,NaT,NaT,2022-07-08,108,98,2,8,2022-03-08,2022-07-08
9,14068,enrolled,2022-07-07,NaT,NaT,2022-07-06,94,93,1,0,2022-03-16,2022-07-06
0,14030,enrolled,2022-07-09,NaT,NaT,2022-07-09,38,26,0,12,2022-02-08,2022-07-09
22,14066,enrolled,2022-07-07,NaT,NaT,2022-07-09,33,30,1,2,2022-06-07,2022-07-09


In [31]:
df = responses_df.groupby('QUESTIONNAIRE')[['QUESTIONNAIRE_ID', 'ALP_ID']].nunique().reset_index()
df.columns = ['QUESTIONNAIRE', 'COUNT_SUBMISSIONS', 'COUNT_USERS']
Display.frame(df)

Unnamed: 0,QUESTIONNAIRE,COUNT_SUBMISSIONS,COUNT_USERS
0,covid19-covhub-symptom-tracking,883,28
1,post-covid19-test,52,19
2,vaccine_questionnaire,25,13
