In [1]:
import pandas as pd
from pandasql import sqldf
import math

# Import data

In [10]:
paths = {'windows': 'C:/Users/jjung/iCloudDrive/ETH/MSc 3rd semester/Semester project/Data',
         'macOS': '/Users/janoschjungo/Library/Mobile Documents/com~apple~CloudDocs/ETH/MSc 3rd semester/Semester project/Data'}
path = paths['windows']

# import physiological data
physio = pd.DataFrame()
for subjectID in range(1, 28):
    # load physiological data for subject
    try:
        file = path + f'/subjectID_{subjectID}.csv'
        physio_subject = pd.read_csv(file)
    except FileNotFoundError:
        path = paths['macOS']
        file = path + f'/subjectID_{subjectID}.csv'
        physio_subject = pd.read_csv(file)
    n_entries = physio_subject.shape[0]
    physio_subject['SubjectID'] = [subjectID for i in range(n_entries)]

    # clean column names (dataset contains different column names per subject)
    if 'SkinTemperature.Value' in physio_subject.columns:
        physio_subject = physio_subject.rename(columns={'SkinTemperature.Value': 'SkinTemperature'}, errors='raise')

    # combine all subject data
    physio = pd.concat([physio, physio_subject])

# import fatigue (PROs) data
fatigue = pd.read_csv(path + '/fatiguePROs.csv')

# Convert data

# a) Fatigue (PROs) data

In [151]:
# convert questions into fatigue variables
query = '''
SELECT SubjectID as subjectID, DateTime AS timestamp, Timezone AS timezone,
CASE
    WHEN PROquestion LIKE 'Describe fatigue on a scale of 1 to 10, where 1 means you don’t feel tired at all and 10 means the worst tiredness you can imagine' THEN PROanswer_value
END AS 'VAS',
CASE
    WHEN PROquestion LIKE 'Physically, today how often did you feel exhausted?' THEN PROanswer_choice
END AS 'phF',
CASE
    WHEN PROquestion LIKE 'Mentally, today how often did you feel exhausted?' THEN PROanswer_choice
END AS 'MF',
CASE
    WHEN PROquestion LIKE 'Are you feeling better, worse or the same as yesterday?' THEN PROanswer_choice
END AS 'ReIP',
CASE
    WHEN PROquestion LIKE 'Did you do sport today?' THEN PROanswer_choice
END AS 'sport'
FROM fatigue
'''
temp = sqldf(query)
temp = temp.fillna(value=pd.NA)
temp

Unnamed: 0,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,1,14.03.19 20:01,UTC,2.0,,,,
1,1,14.03.19 20:01,UTC,,,,Worse,
2,1,14.03.19 20:01,UTC,,Never,,,
3,1,14.03.19 20:01,UTC,,,Sometimes,,
4,1,14.03.19 20:01,UTC,,,,,No
...,...,...,...,...,...,...,...,...
2266,28,16.08.18 00:51,CEST,,,Sometimes,,
2267,28,16.08.18 00:52,CEST,4.0,,,,
2268,28,16.08.18 00:52,CEST,,,,Better,
2269,28,16.08.18 00:52,CEST,,Never,,,


In [152]:
# distinguish different questionnaires with same timestamp (data is in chronological order)
temp = temp.reset_index(drop=False)
temp = temp.rename(columns={'index': 'id'})

# same id = same questionnaire
questionnaire = 0
questionnaire_timestamp = temp.loc[0, 'timestamp']
asked_questions = {'VAS': 0, 'phF': 0, 'MF': 0, 'ReIP': 0, 'sport': 0} # already asked?
for i, row in temp.iterrows():
    asked_questions = {key: value + int(not pd.isna(row[key])) for key, value in asked_questions.items()}

    # different timestamp? -> different questionnaire
    if row['timestamp'] != questionnaire_timestamp:
        questionnaire += 1
        questionnaire_timestamp = temp.loc[i, 'timestamp']
        asked_questions = {key: int(not pd.isna(row[key])) for key, value in asked_questions.items()}
    # same timestamp but same question again (vas already asked)? -> different questionnaire
    elif 2 in set(asked_questions.values()):
        questionnaire += 1
        questionnaire_timestamp = temp.loc[i, 'timestamp']
        asked_questions = {key: int(not pd.isna(row[key])) for key, value in asked_questions.items()}
    else:
        pass

    temp.loc[i, 'id'] = questionnaire
temp

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,0,1,14.03.19 20:01,UTC,2.0,,,,
1,0,1,14.03.19 20:01,UTC,,,,Worse,
2,0,1,14.03.19 20:01,UTC,,Never,,,
3,0,1,14.03.19 20:01,UTC,,,Sometimes,,
4,0,1,14.03.19 20:01,UTC,,,,,No
...,...,...,...,...,...,...,...,...,...
2266,524,28,16.08.18 00:51,CEST,,,Sometimes,,
2267,525,28,16.08.18 00:52,CEST,4.0,,,,
2268,525,28,16.08.18 00:52,CEST,,,,Better,
2269,525,28,16.08.18 00:52,CEST,,Never,,,


In [153]:
query = '''
SELECT id, subjectID, timestamp, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(sport) AS sport
FROM temp
GROUP BY id, subjectID, timestamp
'''
questionnaires = sqldf(query)
questionnaires

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,0,1,14.03.19 20:01,UTC,2.0,Never,Sometimes,Worse,No
1,1,1,15.03.19 20:01,UTC,2.0,Sometimes,Sometimes,Better,Yes
2,2,1,16.03.19 20:47,UTC,1.0,Never,Never,Same,No
3,3,1,17.03.19 20:01,UTC,1.0,Sometimes,Never,Same,Yes
4,4,1,18.03.19 20:13,UTC,2.0,Sometimes,Never,Worse,Yes
...,...,...,...,...,...,...,...,...,...
521,521,28,10.08.18 23:13,CEST,4.0,Never,Sometimes,Same,
522,522,28,13.08.18 21:39,CEST,4.0,Sometimes,Regularly,Same,
523,523,28,14.08.18 23:27,CEST,6.0,Never,Sometimes,Worse,
524,524,28,16.08.18 00:51,CEST,5.0,Sometimes,Sometimes,Better,


In [154]:
# incomplete questionnaires (ignore sport label as it's not asked in all questionnaires)
query = '''
SELECT *
FROM questionnaires
WHERE VAS IS NULL OR phF IS NULL OR MF IS NULL OR ReIP IS NULL
'''
sqldf(query)

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,105,15,09.06.19 22:27,UTC,1.0,,,,
1,215,23,05.02.18 21:00,CET,,,,Better,
2,281,24,06.02.18 21:43,CET,1.0,,,,


In [155]:
# discard incomplete questionnaires
query = '''
SELECT *
FROM questionnaires

EXCEPT

SELECT *
FROM questionnaires
WHERE VAS IS NULL OR phF IS NULL OR MF IS NULL OR ReIP IS NULL;
'''
questionnaires = sqldf(query)
questionnaires

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,0,1,14.03.19 20:01,UTC,2.0,Never,Sometimes,Worse,No
1,1,1,15.03.19 20:01,UTC,2.0,Sometimes,Sometimes,Better,Yes
2,2,1,16.03.19 20:47,UTC,1.0,Never,Never,Same,No
3,3,1,17.03.19 20:01,UTC,1.0,Sometimes,Never,Same,Yes
4,4,1,18.03.19 20:13,UTC,2.0,Sometimes,Never,Worse,Yes
...,...,...,...,...,...,...,...,...,...
518,521,28,10.08.18 23:13,CEST,4.0,Never,Sometimes,Same,
519,522,28,13.08.18 21:39,CEST,4.0,Sometimes,Regularly,Same,
520,523,28,14.08.18 23:27,CEST,6.0,Never,Sometimes,Worse,
521,524,28,16.08.18 00:51,CEST,5.0,Sometimes,Sometimes,Better,


In [160]:
# aggregate questionnaires into daily fatigue values
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(sport) AS sport, COUNT(*) AS n_answers
FROM questionnaires
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
fatigue_daily = sqldf(query)
fatigue_daily

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,"5.0,5.0,5.0,2.0","Same,Same,Same,Worse","Regularly,Regularly,Sometimes,Sometimes","Sometimes,Sometimes,Sometimes,Sometimes",,4
1,24,07.02.18,CET,"4.0,3.0,5.0","Same,Same,Worse","Sometimes,Never,Regularly","Sometimes,Never,Sometimes",,3
2,4,30.03.19,UTC,"3.0,2.0","Better,Better","Never,Never","Never,Never",No,2
3,5,04.04.19,UTC,"6.0,2.0","Better,Better","Sometimes,Never","Sometimes,Never","No,No",2
4,5,07.04.19,UTC,"9.0,3.0","Better,Better","Sometimes,Sometimes","Sometimes,Never","Yes,Yes",2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,3.0,Worse,Sometimes,Sometimes,,1
472,28,09.08.18,CEST,5.0,Same,Never,Never,,1
473,28,10.08.18,CEST,4.0,Same,Never,Sometimes,,1
474,28,13.08.18,CEST,4.0,Same,Sometimes,Regularly,,1


In [162]:
# check how many multiple questionnaires are filled out per day
query = '''
SELECT COUNT(*)
FROM fatigue_daily
WHERE n_answers > 1;
'''
sqldf(query)

Unnamed: 0,COUNT(*)
0,44


--

In [174]:
questionnaires

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,0,1,14.03.19 20:01,UTC,2.0,Never,Sometimes,Worse,No
1,1,1,15.03.19 20:01,UTC,2.0,Sometimes,Sometimes,Better,Yes
2,2,1,16.03.19 20:47,UTC,1.0,Never,Never,Same,No
3,3,1,17.03.19 20:01,UTC,1.0,Sometimes,Never,Same,Yes
4,4,1,18.03.19 20:13,UTC,2.0,Sometimes,Never,Worse,Yes
...,...,...,...,...,...,...,...,...,...
518,521,28,10.08.18 23:13,CEST,4.0,Never,Sometimes,Same,
519,522,28,13.08.18 21:39,CEST,4.0,Sometimes,Regularly,Same,
520,523,28,14.08.18 23:27,CEST,6.0,Never,Sometimes,Worse,
521,524,28,16.08.18 00:51,CEST,5.0,Sometimes,Sometimes,Better,


In [179]:
# convert into numeric classes
query = '''
SELECT subjectID, timestamp, timezone,
CASE
WHEN CAST(VAS AS INT) BETWEEN 1.0 AND 4.0 THEN 0
WHEN CAST(VAS AS INT) BETWEEN 5.0 AND 10.0 THEN 1
END AS 'VAS',
CASE
WHEN phF LIKE 'never' THEN 0
WHEN phF LIKE 'sometimes' OR phF LIKE 'regularly' OR phF LIKE 'often' OR phF LIKE 'always' THEN 1
END AS 'phF',
CASE
WHEN MF LIKE 'never' THEN 0
WHEN MF LIKE 'sometimes' OR MF LIKE 'regularly' OR MF LIKE 'often' OR MF LIKE 'always' THEN 1
END AS 'MF',
CASE
WHEN ReIP LIKE 'worse' THEN -1
WHEN ReIP LIKE 'same' THEN 0
WHEN ReIP LIKE 'better' THEN 1
END AS 'ReIP',
CASE
WHEN sport LIKE 'No' THEN 0
WHEN sport LIKE 'Yes' THEN 1
END AS 'sport'
FROM questionnaires
'''
temp = sqldf(query)
temp

Unnamed: 0,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,1,14.03.19 20:01,UTC,0,0,1,-1,0.0
1,1,15.03.19 20:01,UTC,0,1,1,1,1.0
2,1,16.03.19 20:47,UTC,0,0,0,0,0.0
3,1,17.03.19 20:01,UTC,0,1,0,0,1.0
4,1,18.03.19 20:13,UTC,0,1,0,-1,1.0
...,...,...,...,...,...,...,...,...
518,28,10.08.18 23:13,CEST,0,0,1,0,
519,28,13.08.18 21:39,CEST,0,1,1,0,
520,28,14.08.18 23:27,CEST,1,0,1,-1,
521,28,16.08.18 00:51,CEST,1,1,1,1,


In [194]:
# aggregate questionnaires into daily fatigue values
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(sport) AS sport, COUNT(*) AS n_answers
FROM temp
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
sqldf(query).fillna(value=pd.NA)

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,1110,"0,0,0,-1",1111,1111,,4
1,24,07.02.18,CET,001,"0,0,-1",101,101,,3
2,4,30.03.19,UTC,00,11,00,00,0.0,2
3,5,04.04.19,UTC,10,11,10,10,"0.0,0.0",2
4,5,07.04.19,UTC,10,11,11,10,"1.0,1.0",2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,0,-1,1,1,,1
472,28,09.08.18,CEST,1,0,0,0,,1
473,28,10.08.18,CEST,0,0,0,1,,1
474,28,13.08.18,CEST,0,0,1,1,,1


In [228]:
# aggregate questionnaires into SINGLE daily fatigue values
# TODO: CHECK VALIDNESS OF AVERAGING
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, ROUND(AVG(VAS)) AS VAS, ROUND(AVG(ReIP)) AS ReIP, ROUND(AVG(phF)) AS phF, ROUND(AVG(MF)) AS MF, ROUND(AVG(sport)) AS sport, COUNT(*) AS n_answers
FROM temp
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
Y = sqldf(query).fillna(value=pd.NA)
Y

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,1.0,0.0,1.0,1.0,,4
1,24,07.02.18,CET,0.0,0.0,1.0,1.0,,3
2,4,30.03.19,UTC,0.0,1.0,0.0,0.0,0.0,2
3,5,04.04.19,UTC,1.0,1.0,1.0,1.0,0.0,2
4,5,07.04.19,UTC,1.0,1.0,1.0,1.0,1.0,2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,0.0,-1.0,1.0,1.0,,1
472,28,09.08.18,CEST,1.0,0.0,0.0,0.0,,1
473,28,10.08.18,CEST,0.0,0.0,0.0,1.0,,1
474,28,13.08.18,CEST,0.0,0.0,1.0,1.0,,1


# b) Physiological data

In [197]:
physio = physio.fillna(value='NaN') # otherwise SQL will ignore None values
physio

Unnamed: 0,Timestamp,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,SubjectID,ActivityClass
0,14.03.19 14:38,0.784314,981.25,0.0,0.0,753.636566,,83.0,,,0.0,28.28,1,
1,14.03.19 14:39,1.176471,981.2,0.175,1.9,1256.060944,,75.403633,,,0.0,29.44,1,
2,14.03.19 14:40,2.352941,981.2,0.19,1.82,1256.060944,,69.112653,,14.5625,0.0,30.41,1,
3,14.03.19 14:41,1.176471,981.1,0.11,1.94,1256.060944,,69.781164,,12.175528,0.0,31.0,1,
4,14.03.19 14:42,1.176471,981.2,0.1,1.68,1256.060944,,72.306609,,12.531234,0.0,31.455,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313578,13.09.18 06:36,0.784314,966.9,0.14,2.31,1363.812563,,84.831541,42.3,20.008803,0.0,32.63,27,
313579,13.09.18 06:37,5.882353,966.9,0.13,2.61,1330.995827,,82.644772,40.933333,21.377863,0.0,32.69,27,
313580,13.09.18 06:38,2.352941,966.9,0.13,2.34,1330.995827,,92.145701,41.636364,21.906977,0.0,32.75,27,
313581,13.09.18 06:39,1.960784,966.9,0.16,2.81,1330.995827,,91.410742,,16.136986,0.0,32.81,27,


In [225]:
query = '''
SELECT SubjectID AS subjectID,
    SUBSTRING(Timestamp, 1, 8) AS date,
    GROUP_CONCAT(ActivityCounts) AS ActivityCounts,
    GROUP_CONCAT(Barometer) AS Barometer,
    GROUP_CONCAT(BloodPerfusion) AS BloodPerfusion,
    GROUP_CONCAT(BloodPulseWave) AS BloodPulseWave,
    GROUP_CONCAT(EnergyExpenditure) AS EnergyExpenditure,
    GROUP_CONCAT(GalvanicSkinResponse) AS GalvanicSkinResponse,
    GROUP_CONCAT(HR) AS HR, GROUP_CONCAT(HRV) AS HRV,
    GROUP_CONCAT(RESP) AS RESP,
    GROUP_CONCAT(Steps) AS Steps,
    GROUP_CONCAT(SkinTemperature) AS SkinTemperature,
    GROUP_CONCAT(ActivityClass) AS ActivityClass
FROM physio
GROUP BY subjectID, date;'''
X = sqldf(query)
X

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,NaN,NaN,NaN,NaN,N...","0.0,1.9,1.82,1.94,1.68,1.09,NaN,NaN,NaN,NaN,Na...","753.6365662,1256.060944,1256.060944,1256.06094...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","83.0,75.40363269,69.11265332,69.78116438,72.30...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,14.5625,12.17552786,12.531234,14.69469...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NaN,0....","28.28,29.44,30.41,31.0,31.455,31.795,32.22,31....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...","14.34420472,14.21430416,12.45540139,14.6309642...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.13,36.19,36.25,36.31,36.44,36.5,36.59,36.69...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...","15.03026316,15.03544724,12.88235294,12.5884458...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","34.34,34.38,34.38,34.455,34.16,34.06,34.19,34....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...","14.23136959,13.68923699,16.7,16.6549496,13.494...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","35.63,35.63,35.44,35.41,35.38,35.38,35.38,35.3...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...","15.35642063,14.75081392,14.21644803,10.5670285...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.94,36.97,36.97,36.69,36.53,36.34,36.19,35.9...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,27,30.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","967.1,967.05,967.0,967.1,967.1,967.0,967.0,967...","0.62,0.63,0.5,0.56,0.555,0.55,0.53,0.51,0.49,0...","2.68,2.7,2.78,2.45,2.58,2.67,2.52,2.5,2.6,2.6,...","1330.995827,1330.995827,1374.446035,1330.99582...","0.925716667,0.934888889,0.854785311,0.85772777...","63.41839611,63.30076726,65.60981987,58.1836150...","44.13333333,43.55,42.65485714,40.7,39.26666667...","16.56763527,17.25,16.70448549,15.14208633,14.6...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.44,36.41,36.38,36.34,36.31,36.22,36.16,36.0...","1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1...."
947,27,31.03.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
948,27,31.05.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
949,27,31.07.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."


In [223]:
# don't think this is necessary:
query = '''
SELECT SubjectID AS subjectID,
    SUBSTRING(Timestamp, 1, 8) AS date,
    GROUP_CONCAT('(' || ActivityCounts || ',' || hour || ')') AS ActivityCounts,
    GROUP_CONCAT('(' || Barometer || ',' || hour || ')') AS Barometer,
    GROUP_CONCAT('(' || BloodPerfusion || ',' || hour || ')') AS BloodPerfusion,
    GROUP_CONCAT('(' || BloodPulseWave || ',' || hour || ')') AS BloodPulseWave,
    GROUP_CONCAT('(' || EnergyExpenditure || ',' || hour || ')') AS EnergyExpenditure,
    GROUP_CONCAT('(' || GalvanicSkinResponse || ',' || hour || ')') AS GalvanicSkinResponse,
    GROUP_CONCAT('(' || HR || ',' || hour || ')') AS HR,
    GROUP_CONCAT('(' || HRV || ',' || hour || ')') AS HRV,
    GROUP_CONCAT('(' || RESP || ',' || hour || ')') AS RESP,
    GROUP_CONCAT('(' || Steps || ',' || hour || ')') AS Steps,
    GROUP_CONCAT('(' || SkinTemperature || ',' || hour || ')') AS SkinTemperature,
    GROUP_CONCAT('(' || ActivityClass || ',' || hour || ')') AS ActivityClass
FROM (
    SELECT *, SUBSTRING(Timestamp, 10, LENGTH(Timestamp)) AS hour
    FROM physio
)
GROUP BY subjectID, date;'''
sqldf(query)

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,"(0.784313725,14:38),(1.176470588,14:39),(2.352...","(981.25,14:38),(981.2,14:39),(981.2,14:40),(98...","(0.0,14:38),(0.175,14:39),(0.19,14:40),(0.11,1...","(0.0,14:38),(1.9,14:39),(1.82,14:40),(1.94,14:...","(753.6365662,14:38),(1256.060944,14:39),(1256....","(NaN,14:38),(NaN,14:39),(NaN,14:40),(NaN,14:41...","(83.0,14:38),(75.40363269,14:39),(69.11265332,...","(NaN,14:38),(NaN,14:39),(NaN,14:40),(NaN,14:41...","(NaN,14:38),(NaN,14:39),(14.5625,14:40),(12.17...","(0.0,14:38),(0.0,14:39),(0.0,14:40),(0.0,14:41...","(28.28,14:38),(29.44,14:39),(30.41,14:40),(31....","(NaN,14:38),(NaN,14:39),(NaN,14:40),(NaN,14:41..."
1,1,15.03.19,"(0.784313725,00:00),(0.784313725,00:01),(0.784...","(984.4,00:00),(984.5,00:01),(984.3,00:02),(984...","(0.875,00:00),(0.825,00:01),(0.975,00:02),(0.9...","(2.27,00:00),(2.18,00:01),(2.19,00:02),(2.18,0...","(1311.85318,00:00),(1311.85318,00:01),(1311.85...","(2.471338889,00:00),(2.46735,00:01),(2.4663722...","(67.54923531,00:00),(57.26287697,00:01),(55.22...","(44.92312073,00:00),(51.26666667,00:01),(57.7,...","(14.34420472,00:00),(14.21430416,00:01),(12.45...","(0.0,00:00),(0.0,00:01),(0.0,00:02),(0.0,00:03...","(36.13,00:00),(36.19,00:01),(36.25,00:02),(36....","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
2,1,16.03.19,"(0.784313725,00:00),(1.176470588,00:01),(5.098...","(990.7,00:00),(990.6,00:01),(990.7,00:02),(990...","(1.045,00:00),(0.79,00:01),(0.84,00:02),(0.585...","(2.32,00:00),(2.22,00:01),(2.44,00:02),(2.18,0...","(1311.85318,00:00),(1311.85318,00:01),(1311.85...","(1.172788889,00:00),(1.171305556,00:01),(1.061...","(64.65655738,00:00),(63.83929024,00:01),(63.86...","(31.3815721,00:00),(34.48333333,00:01),(36.25,...","(15.03026316,00:00),(15.03544724,00:01),(12.88...","(0.0,00:00),(0.0,00:01),(0.0,00:02),(0.0,00:03...","(34.34,00:00),(34.38,00:01),(34.38,00:02),(34....","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
3,1,17.03.19,"(0.784313725,00:00),(0.784313725,00:01),(0.784...","(982.15,00:00),(982.05,00:01),(982.0,00:02),(9...","(1.42,00:00),(1.47,00:01),(1.48,00:02),(1.46,0...","(2.86,00:00),(2.7,00:01),(2.73,00:02),(2.72,00...","(1311.85318,00:00),(1311.85318,00:01),(1311.85...","(2.573777778,00:00),(2.567694444,00:01),(2.560...","(69.66641006,00:00),(70.49757838,00:01),(70.17...","(23.08598131,00:00),(22.81666667,00:01),(20.41...","(14.23136959,00:00),(13.68923699,00:01),(16.7,...","(0.0,00:00),(0.0,00:01),(0.0,00:02),(0.0,00:03...","(35.63,00:00),(35.63,00:01),(35.44,00:02),(35....","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
4,1,18.03.19,"(0.784313725,00:00),(1.176470588,00:01),(0.784...","(988.5,00:00),(988.6,00:01),(988.7,00:02),(988...","(1.48,00:00),(1.49,00:01),(1.155,00:02),(1.525...","(1.54,00:00),(1.61,00:01),(2.18,00:02),(2.01,0...","(1289.98896,00:00),(1311.85318,00:01),(1311.85...","(2.632423729,00:00),(2.630555556,00:01),(2.626...","(57.42528736,00:00),(59.38585964,00:01),(65.78...","(33.69491525,00:00),(34.18333333,00:01),(37.91...","(15.35642063,00:00),(14.75081392,00:01),(14.21...","(0.0,00:00),(0.0,00:01),(0.0,00:02),(0.0,00:03...","(36.94,00:00),(36.97,00:01),(36.97,00:02),(36....","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,27,30.08.18,"(0.784313725,00:00),(0.784313725,00:01),(0.784...","(967.1,00:00),(967.05,00:01),(967.0,00:02),(96...","(0.62,00:00),(0.63,00:01),(0.5,00:02),(0.56,00...","(2.68,00:00),(2.7,00:01),(2.78,00:02),(2.45,00...","(1330.995827,00:00),(1330.995827,00:01),(1374....","(0.925716667,00:00),(0.934888889,00:01),(0.854...","(63.41839611,00:00),(63.30076726,00:01),(65.60...","(44.13333333,00:00),(43.55,00:01),(42.65485714...","(16.56763527,00:00),(17.25,00:01),(16.70448549...","(0.0,00:00),(0.0,00:01),(0.0,00:02),(0.0,00:03...","(36.44,00:00),(36.41,00:01),(36.38,00:02),(36....","(1.0,00:00),(1.0,00:01),(1.0,00:02),(1.0,00:03..."
947,27,31.03.18,"(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
948,27,31.05.18,"(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."
949,27,31.07.18,"(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03...","(NaN,00:00),(NaN,00:01),(NaN,00:02),(NaN,00:03..."


# Join data

In [232]:
# don't think this is necessary:
query = '''
SELECT *
FROM X JOIN Y ON X.subjectID = Y.subjectID AND X.date = Y.date;'''
data = sqldf(query)
data

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,...,ActivityClass,subjectID.1,date.1,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,NaN,NaN,NaN,NaN,N...","0.0,1.9,1.82,1.94,1.68,1.09,NaN,NaN,NaN,NaN,Na...","753.6365662,1256.060944,1256.060944,1256.06094...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","83.0,75.40363269,69.11265332,69.78116438,72.30...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",1,14.03.19,UTC,0.0,-1.0,0.0,1.0,0.0,1
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",1,15.03.19,UTC,0.0,1.0,1.0,1.0,1.0,1
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",1,16.03.19,UTC,0.0,0.0,0.0,0.0,0.0,1
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",1,17.03.19,UTC,0.0,0.0,1.0,0.0,1.0,1
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",1,18.03.19,UTC,0.0,-1.0,1.0,0.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,27,27.02.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",27,27.02.18,CET,0.0,-1.0,1.0,0.0,,2
440,27,27.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","964.8,964.8,964.8,964.7,964.7,964.7,964.7,964....","0.65,0.64,0.66,0.65,0.66,0.66,0.62,0.66,0.66,0...","2.58,2.83,2.6,2.7,2.7,2.66,2.94,2.68,2.8,2.66,...","1330.995827,1330.995827,1330.995827,1330.99582...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","64.40149177,63.1057941,64.60267283,63.78507079...","62.2875,61.9,61.55,61.4,64.38333333,65.1906202...",...,"1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1....",27,27.08.18,CEST,0.0,1.0,1.0,0.0,,1
441,27,28.02.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",27,28.02.18,CET,0.0,0.0,1.0,0.0,,1
442,27,28.08.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",27,28.08.18,CEST,0.0,1.0,0.0,0.0,,1


In [234]:
# missing data for fatigue values?
Y.shape[0] - data.shape[0]

32

In [245]:
query = '''
WITH yy AS (
    SELECT subjectID, date
    FROM Y
),
xx AS (
    SELECT subjectID, date
    FROM X
),
joined AS (
    SELECT *
    FROM xx NATURAL JOIN yy
)
SELECT *
FROM yy

EXCEPT

SELECT *
FROM joined;'''
sqldf(query)

Unnamed: 0,subjectID,date
0,2,17.04.19
1,2,18.04.19
2,2,19.04.19
3,2,20.04.19
4,2,21.04.19
5,2,22.04.19
6,22,02.06.19
7,24,13.12.17
8,24,14.12.17
9,24,15.12.17


In [246]:
X # seems like for 32 days we have questionnaires but no physiological data?

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,NaN,NaN,NaN,NaN,N...","0.0,1.9,1.82,1.94,1.68,1.09,NaN,NaN,NaN,NaN,Na...","753.6365662,1256.060944,1256.060944,1256.06094...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","83.0,75.40363269,69.11265332,69.78116438,72.30...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,14.5625,12.17552786,12.531234,14.69469...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NaN,0....","28.28,29.44,30.41,31.0,31.455,31.795,32.22,31....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...","14.34420472,14.21430416,12.45540139,14.6309642...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.13,36.19,36.25,36.31,36.44,36.5,36.59,36.69...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...","15.03026316,15.03544724,12.88235294,12.5884458...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","34.34,34.38,34.38,34.455,34.16,34.06,34.19,34....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...","14.23136959,13.68923699,16.7,16.6549496,13.494...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","35.63,35.63,35.44,35.41,35.38,35.38,35.38,35.3...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...","15.35642063,14.75081392,14.21644803,10.5670285...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.94,36.97,36.97,36.69,36.53,36.34,36.19,35.9...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,27,30.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","967.1,967.05,967.0,967.1,967.1,967.0,967.0,967...","0.62,0.63,0.5,0.56,0.555,0.55,0.53,0.51,0.49,0...","2.68,2.7,2.78,2.45,2.58,2.67,2.52,2.5,2.6,2.6,...","1330.995827,1330.995827,1374.446035,1330.99582...","0.925716667,0.934888889,0.854785311,0.85772777...","63.41839611,63.30076726,65.60981987,58.1836150...","44.13333333,43.55,42.65485714,40.7,39.26666667...","16.56763527,17.25,16.70448549,15.14208633,14.6...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.44,36.41,36.38,36.34,36.31,36.22,36.16,36.0...","1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1...."
947,27,31.03.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
948,27,31.05.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
949,27,31.07.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."


# Export data

In [257]:
output_path = '/'.join(path.split('/')[:-1]) + '/Output'

In [258]:
data.to_csv(output_path + '/combined_data.csv')