In [1]:
import pandas as pd
from pandasql import sqldf
import math

# Import data

In [2]:
# file path to data folder
paths = {'windows': 'C:/Users/jjung/iCloudDrive/ETH/MSc 3rd semester/Semester project/Data',
         'macOS': '/Users/janoschjungo/Library/Mobile Documents/com~apple~CloudDocs/ETH/MSc 3rd semester/Semester project/Data'}
path = paths['windows']

In [3]:
# import physiological data
physio = pd.DataFrame()
for subjectID in range(1, 28):
    # load physiological data for subject
    try:
        file = path + f'/subjectID_{subjectID}.csv'
        physio_subject = pd.read_csv(file)
    except FileNotFoundError:
        path = paths['macOS']
        file = path + f'/subjectID_{subjectID}.csv'
        physio_subject = pd.read_csv(file)
    n_entries = physio_subject.shape[0]
    physio_subject['SubjectID'] = [subjectID for i in range(n_entries)]

    # clean column names (dataset contains different column names per subject)
    if 'SkinTemperature.Value' in physio_subject.columns:
        physio_subject = physio_subject.rename(columns={'SkinTemperature.Value': 'SkinTemperature'}, errors='raise')

    # combine all subject data
    physio = pd.concat([physio, physio_subject])

# import fatigue (PROs) data
fatigue = pd.read_csv(path + '/fatiguePROs.csv')

# Convert data

# a) Fatigue (PROs) data

In [8]:
# convert questions into fatigue variables
query = '''
SELECT SubjectID as subjectID, DateTime AS timestamp, Timezone AS timezone,
CASE
    WHEN PROquestion LIKE 'Describe fatigue on a scale of 1 to 10, where 1 means you don’t feel tired at all and 10 means the worst tiredness you can imagine' THEN PROanswer_value
END AS 'VAS',
CASE
    WHEN PROquestion LIKE 'Physically, today how often did you feel exhausted?' THEN PROanswer_choice
END AS 'phF',
CASE
    WHEN PROquestion LIKE 'Mentally, today how often did you feel exhausted?' THEN PROanswer_choice
END AS 'MF',
CASE
    WHEN PROquestion LIKE 'Are you feeling better, worse or the same as yesterday?' THEN PROanswer_choice
END AS 'ReIP',
CASE
    WHEN PROquestion LIKE 'Did you do sport today?' THEN PROanswer_choice
END AS 'sport'
FROM fatigue
'''
temp = sqldf(query)
temp = temp.fillna(value=pd.NA)
print('total number of questions:', temp.shape[0])

total number of questions: 2271


In [11]:
# distinguish different questionnaires with same timestamp (note: data is in chronological order)
temp = temp.reset_index(drop=False)
temp = temp.rename(columns={'index': 'id'})

# same id = same questionnaire
questionnaire = 0
questionnaire_timestamp = temp.loc[0, 'timestamp']
asked_questions = {'VAS': 0, 'phF': 0, 'MF': 0, 'ReIP': 0, 'sport': 0} # already asked?
for i, row in temp.iterrows():
    asked_questions = {key: value + int(not pd.isna(row[key])) for key, value in asked_questions.items()}

    # different timestamp? -> different questionnaire
    if row['timestamp'] != questionnaire_timestamp:
        questionnaire += 1
        questionnaire_timestamp = temp.loc[i, 'timestamp']
        asked_questions = {key: int(not pd.isna(row[key])) for key, value in asked_questions.items()}
    # same timestamp but same question again (vas already asked)? -> different questionnaire
    elif 2 in set(asked_questions.values()):
        questionnaire += 1
        questionnaire_timestamp = temp.loc[i, 'timestamp']
        asked_questions = {key: int(not pd.isna(row[key])) for key, value in asked_questions.items()}
    else:
        pass

    temp.loc[i, 'id'] = questionnaire

query = '''
SELECT id, subjectID, timestamp, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(sport) AS sport
FROM temp
GROUP BY id, subjectID, timestamp
'''
questionnaires = sqldf(query)
print('total number of separate questionnaires:', questionnaires.shape[0])

total number of separate questionnaires: 526


In [15]:
# incomplete questionnaires (ignore sport label as it's not asked in all questionnaires)
query = '''
SELECT *
FROM questionnaires
WHERE VAS IS NULL OR phF IS NULL OR MF IS NULL OR ReIP IS NULL
'''
temp = sqldf(query)

# discard incomplete questionnaires
query = '''
SELECT *
FROM questionnaires

EXCEPT

SELECT *
FROM questionnaires
WHERE VAS IS NULL OR phF IS NULL OR MF IS NULL OR ReIP IS NULL;
'''
questionnaires = sqldf(query)

print('number of discarded questionnaires:', temp.shape[0])
temp

number of discarded questionnaires: 3


Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,105,15,09.06.19 22:27,UTC,1.0,,,,
1,215,23,05.02.18 21:00,CET,,,,Better,
2,281,24,06.02.18 21:43,CET,1.0,,,,


In [23]:
# aggregate questionnaires into daily fatigue values
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(sport) AS sport, COUNT(*) AS n_answers
FROM questionnaires
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
fatigue_daily = sqldf(query)

# check how many multiple questionnaires are filled out per day
query = '''
SELECT COUNT(*) AS same_day_questionnaires
FROM fatigue_daily
WHERE n_answers > 1;
'''
print('total number of same day questionnaires', sqldf(query).iloc[0])
sqldf(query)

total number of same day questionnaires same_day_questionnaires    44
Name: 0, dtype: int64


Unnamed: 0,same_day_questionnaires
0,44


In [28]:
# check how many multiple questionnaires are filled out per day
query = '''
SELECT COUNT(*)
FROM fatigue_daily
WHERE n_answers > 1;
'''
sqldf(query)

Unnamed: 0,COUNT(*)
0,44


--

In [29]:
questionnaires

Unnamed: 0,id,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,0,1,14.03.19 20:01,UTC,2.0,Never,Sometimes,Worse,No
1,1,1,15.03.19 20:01,UTC,2.0,Sometimes,Sometimes,Better,Yes
2,2,1,16.03.19 20:47,UTC,1.0,Never,Never,Same,No
3,3,1,17.03.19 20:01,UTC,1.0,Sometimes,Never,Same,Yes
4,4,1,18.03.19 20:13,UTC,2.0,Sometimes,Never,Worse,Yes
...,...,...,...,...,...,...,...,...,...
518,521,28,10.08.18 23:13,CEST,4.0,Never,Sometimes,Same,
519,522,28,13.08.18 21:39,CEST,4.0,Sometimes,Regularly,Same,
520,523,28,14.08.18 23:27,CEST,6.0,Never,Sometimes,Worse,
521,524,28,16.08.18 00:51,CEST,5.0,Sometimes,Sometimes,Better,


In [30]:
# convert into numeric classes
query = '''
SELECT subjectID, timestamp, timezone,
CASE
WHEN CAST(VAS AS INT) BETWEEN 1.0 AND 4.0 THEN 0
WHEN CAST(VAS AS INT) BETWEEN 5.0 AND 10.0 THEN 1
END AS 'VAS',
CASE
WHEN phF LIKE 'never' THEN 0
WHEN phF LIKE 'sometimes' OR phF LIKE 'regularly' OR phF LIKE 'often' OR phF LIKE 'always' THEN 1
END AS 'phF',
CASE
WHEN MF LIKE 'never' THEN 0
WHEN MF LIKE 'sometimes' OR MF LIKE 'regularly' OR MF LIKE 'often' OR MF LIKE 'always' THEN 1
END AS 'MF',
CASE
WHEN ReIP LIKE 'worse' THEN -1
WHEN ReIP LIKE 'same' THEN 0
WHEN ReIP LIKE 'better' THEN 1
END AS 'ReIP',
CASE
WHEN sport LIKE 'No' THEN 0
WHEN sport LIKE 'Yes' THEN 1
END AS 'sport'
FROM questionnaires
'''
temp = sqldf(query)
temp

Unnamed: 0,subjectID,timestamp,timezone,VAS,phF,MF,ReIP,sport
0,1,14.03.19 20:01,UTC,0,0,1,-1,0.0
1,1,15.03.19 20:01,UTC,0,1,1,1,1.0
2,1,16.03.19 20:47,UTC,0,0,0,0,0.0
3,1,17.03.19 20:01,UTC,0,1,0,0,1.0
4,1,18.03.19 20:13,UTC,0,1,0,-1,1.0
...,...,...,...,...,...,...,...,...
518,28,10.08.18 23:13,CEST,0,0,1,0,
519,28,13.08.18 21:39,CEST,0,1,1,0,
520,28,14.08.18 23:27,CEST,1,0,1,-1,
521,28,16.08.18 00:51,CEST,1,1,1,1,


In [31]:
# aggregate questionnaires into daily fatigue values
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, GROUP_CONCAT(VAS) AS VAS, GROUP_CONCAT(ReIP) AS ReIP, GROUP_CONCAT(phF) AS phF, GROUP_CONCAT(MF) AS MF, GROUP_CONCAT(sport) AS sport, COUNT(*) AS n_answers
FROM temp
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
sqldf(query).fillna(value=pd.NA)

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,1110,"0,0,0,-1",1111,1111,,4
1,24,07.02.18,CET,001,"0,0,-1",101,101,,3
2,4,30.03.19,UTC,00,11,00,00,0.0,2
3,5,04.04.19,UTC,10,11,10,10,"0.0,0.0",2
4,5,07.04.19,UTC,10,11,11,10,"1.0,1.0",2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,0,-1,1,1,,1
472,28,09.08.18,CEST,1,0,0,0,,1
473,28,10.08.18,CEST,0,0,0,1,,1
474,28,13.08.18,CEST,0,0,1,1,,1


In [32]:
# aggregate questionnaires into SINGLE daily fatigue values
# TODO: CHECK VALIDNESS OF AVERAGING
query = '''
SELECT subjectID, SUBSTRING(Timestamp, 1, 8) AS date, GROUP_CONCAT(DISTINCT timezone) AS timezone, ROUND(AVG(VAS)) AS VAS, ROUND(AVG(ReIP)) AS ReIP, ROUND(AVG(phF)) AS phF, ROUND(AVG(MF)) AS MF, ROUND(AVG(sport)) AS sport, COUNT(*) AS n_answers
FROM temp
GROUP BY subjectID, date
ORDER BY n_answers DESC;
'''
Y = sqldf(query).fillna(value=pd.NA)
Y

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,1.0,0.0,1.0,1.0,,4
1,24,07.02.18,CET,0.0,0.0,1.0,1.0,,3
2,4,30.03.19,UTC,0.0,1.0,0.0,0.0,0.0,2
3,5,04.04.19,UTC,1.0,1.0,1.0,1.0,0.0,2
4,5,07.04.19,UTC,1.0,1.0,1.0,1.0,1.0,2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,0.0,-1.0,1.0,1.0,,1
472,28,09.08.18,CEST,1.0,0.0,0.0,0.0,,1
473,28,10.08.18,CEST,0.0,0.0,0.0,1.0,,1
474,28,13.08.18,CEST,0.0,0.0,1.0,1.0,,1


# b) Physiological data

In [222]:
physio = physio.fillna(value=pd.NA) # otherwise SQL will ignore None values
physio

Unnamed: 0,Timestamp,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,SubjectID,ActivityClass
0,14.03.19 14:38,0.784314,981.25,0.0,0.0,753.636566,,83.0,,,0.0,28.28,1,
1,14.03.19 14:39,1.176471,981.2,0.175,1.9,1256.060944,,75.403633,,,0.0,29.44,1,
2,14.03.19 14:40,2.352941,981.2,0.19,1.82,1256.060944,,69.112653,,14.5625,0.0,30.41,1,
3,14.03.19 14:41,1.176471,981.1,0.11,1.94,1256.060944,,69.781164,,12.175528,0.0,31.0,1,
4,14.03.19 14:42,1.176471,981.2,0.1,1.68,1256.060944,,72.306609,,12.531234,0.0,31.455,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313578,13.09.18 06:36,0.784314,966.9,0.14,2.31,1363.812563,,84.831541,42.3,20.008803,0.0,32.63,27,
313579,13.09.18 06:37,5.882353,966.9,0.13,2.61,1330.995827,,82.644772,40.933333,21.377863,0.0,32.69,27,
313580,13.09.18 06:38,2.352941,966.9,0.13,2.34,1330.995827,,92.145701,41.636364,21.906977,0.0,32.75,27,
313581,13.09.18 06:39,1.960784,966.9,0.16,2.81,1330.995827,,91.410742,,16.136986,0.0,32.81,27,


In [223]:
query = '''
SELECT SubjectID AS subjectID,
    SUBSTRING(Timestamp, 1, 8) AS date,
    GROUP_CONCAT(ActivityCounts) AS ActivityCounts,
    GROUP_CONCAT(Barometer) AS Barometer,
    GROUP_CONCAT(BloodPerfusion) AS BloodPerfusion,
    GROUP_CONCAT(BloodPulseWave) AS BloodPulseWave,
    GROUP_CONCAT(EnergyExpenditure) AS EnergyExpenditure,
    GROUP_CONCAT(GalvanicSkinResponse) AS GalvanicSkinResponse,
    GROUP_CONCAT(HR) AS HR, GROUP_CONCAT(HRV) AS HRV,
    GROUP_CONCAT(RESP) AS RESP,
    GROUP_CONCAT(Steps) AS Steps,
    GROUP_CONCAT(SkinTemperature) AS SkinTemperature,
    GROUP_CONCAT(ActivityClass) AS ActivityClass
FROM physio
GROUP BY subjectID, date;'''
X = sqldf(query)
X

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,2.55,2.55,0.0,0.1...","0.0,1.9,1.82,1.94,1.68,1.09,1.49,1.51,0.0,2.65...","753.6365662,1256.060944,1256.060944,1256.06094...","4.989619048,4.953446328,4.859005556,4.77845833...","83.0,75.40363269,69.11265332,69.78116438,72.30...","40.1875,40.0,40.0,38.75,44.15,48.66666667,53.4...","14.5625,12.17552786,12.531234,14.69469469,22.4...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","28.28,29.44,30.41,31.0,31.455,31.795,32.22,31....",
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...","14.34420472,14.21430416,12.45540139,14.6309642...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.13,36.19,36.25,36.31,36.44,36.5,36.59,36.69...",
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...","15.03026316,15.03544724,12.88235294,12.5884458...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","34.34,34.38,34.38,34.455,34.16,34.06,34.19,34....",
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...","14.23136959,13.68923699,16.7,16.6549496,13.494...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","35.63,35.63,35.44,35.41,35.38,35.38,35.38,35.3...",
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...","15.35642063,14.75081392,14.21644803,10.5670285...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.94,36.97,36.97,36.69,36.53,36.34,36.19,35.9...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,27,30.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","967.1,967.05,967.0,967.1,967.1,967.0,967.0,967...","0.62,0.63,0.5,0.56,0.555,0.55,0.53,0.51,0.49,0...","2.68,2.7,2.78,2.45,2.58,2.67,2.52,2.5,2.6,2.6,...","1330.995827,1330.995827,1374.446035,1330.99582...","0.925716667,0.934888889,0.854785311,0.85772777...","63.41839611,63.30076726,65.60981987,58.1836150...","44.13333333,43.55,42.65485714,40.7,39.26666667...","16.56763527,17.25,16.70448549,15.14208633,14.6...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.44,36.41,36.38,36.34,36.31,36.22,36.16,36.0...","1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1...."
947,27,31.03.18,,,,,,,,,,,,
948,27,31.05.18,,,,,,,,,,,,
949,27,31.07.18,,,,,,,,,,,,


# Join data

# a) unnested data

In [224]:
physio

Unnamed: 0,Timestamp,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,SubjectID,ActivityClass
0,14.03.19 14:38,0.784314,981.25,0.0,0.0,753.636566,,83.0,,,0.0,28.28,1,
1,14.03.19 14:39,1.176471,981.2,0.175,1.9,1256.060944,,75.403633,,,0.0,29.44,1,
2,14.03.19 14:40,2.352941,981.2,0.19,1.82,1256.060944,,69.112653,,14.5625,0.0,30.41,1,
3,14.03.19 14:41,1.176471,981.1,0.11,1.94,1256.060944,,69.781164,,12.175528,0.0,31.0,1,
4,14.03.19 14:42,1.176471,981.2,0.1,1.68,1256.060944,,72.306609,,12.531234,0.0,31.455,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
313578,13.09.18 06:36,0.784314,966.9,0.14,2.31,1363.812563,,84.831541,42.3,20.008803,0.0,32.63,27,
313579,13.09.18 06:37,5.882353,966.9,0.13,2.61,1330.995827,,82.644772,40.933333,21.377863,0.0,32.69,27,
313580,13.09.18 06:38,2.352941,966.9,0.13,2.34,1330.995827,,92.145701,41.636364,21.906977,0.0,32.75,27,
313581,13.09.18 06:39,1.960784,966.9,0.16,2.81,1330.995827,,91.410742,,16.136986,0.0,32.81,27,


In [226]:
query = '''
SELECT SubjectID AS subjectID,
    SUBSTRING(Timestamp, 1, 8) AS date,
    ActivityCounts,
    Barometer,
    BloodPerfusion,
    BloodPulseWave,
    EnergyExpenditure,
    GalvanicSkinResponse,
    HR,
    HRV,
    RESP,
    Steps,
    SkinTemperature,
    ActivityClass
FROM physio;
'''
temp = sqldf(query)
temp.fillna(value=pd.NA)

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,0.784314,981.25,0.0,0.0,753.636566,,83.0,,,0.0,28.28,
1,1,14.03.19,1.176471,981.2,0.175,1.9,1256.060944,,75.403633,,,0.0,29.44,
2,1,14.03.19,2.352941,981.2,0.19,1.82,1256.060944,,69.112653,,14.5625,0.0,30.41,
3,1,14.03.19,1.176471,981.1,0.11,1.94,1256.060944,,69.781164,,12.175528,0.0,31.0,
4,1,14.03.19,1.176471,981.2,0.1,1.68,1256.060944,,72.306609,,12.531234,0.0,31.455,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1325676,27,13.09.18,0.784314,966.9,0.14,2.31,1363.812563,,84.831541,42.3,20.008803,0.0,32.63,
1325677,27,13.09.18,5.882353,966.9,0.13,2.61,1330.995827,,82.644772,40.933333,21.377863,0.0,32.69,
1325678,27,13.09.18,2.352941,966.9,0.13,2.34,1330.995827,,92.145701,41.636364,21.906977,0.0,32.75,
1325679,27,13.09.18,1.960784,966.9,0.16,2.81,1330.995827,,91.410742,,16.136986,0.0,32.81,


In [230]:
# convert comma separated strings to list
physiological_names = {'ActivityCounts',
                       'Barometer',
                       'BloodPerfusion',
                       'BloodPulseWave',
                       'EnergyExpenditure',
                       'GalvanicSkinResponse',
                       'HR',
                       'HRV',
                       'RESP',
                       'Steps',
                       'SkinTemperature',
                       'ActivityClass'}

for physiological_name in physiological_names:
    pd.to_numeric(temp[physiological_name])
temp

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,0.784314,981.25,0.000,0.00,753.636566,,83.000000,,,0.0,28.280,
1,1,14.03.19,1.176471,981.20,0.175,1.90,1256.060944,,75.403633,,,0.0,29.440,
2,1,14.03.19,2.352941,981.20,0.190,1.82,1256.060944,,69.112653,,14.562500,0.0,30.410,
3,1,14.03.19,1.176471,981.10,0.110,1.94,1256.060944,,69.781164,,12.175528,0.0,31.000,
4,1,14.03.19,1.176471,981.20,0.100,1.68,1256.060944,,72.306609,,12.531234,0.0,31.455,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1325676,27,13.09.18,0.784314,966.90,0.140,2.31,1363.812563,,84.831541,42.300000,20.008803,0.0,32.630,
1325677,27,13.09.18,5.882353,966.90,0.130,2.61,1330.995827,,82.644772,40.933333,21.377863,0.0,32.690,
1325678,27,13.09.18,2.352941,966.90,0.130,2.34,1330.995827,,92.145701,41.636364,21.906977,0.0,32.750,
1325679,27,13.09.18,1.960784,966.90,0.160,2.81,1330.995827,,91.410742,,16.136986,0.0,32.810,


In [228]:
temp.dtypes

subjectID                 int64
date                     object
ActivityCounts          float64
Barometer               float64
BloodPerfusion          float64
BloodPulseWave          float64
EnergyExpenditure       float64
GalvanicSkinResponse    float64
HR                      float64
HRV                     float64
RESP                    float64
Steps                   float64
SkinTemperature         float64
ActivityClass           float64
dtype: object

In [206]:
Y

Unnamed: 0,subjectID,date,timezone,VAS,ReIP,phF,MF,sport,n_answers
0,24,06.02.18,CET,1.0,0.0,1.0,1.0,,4
1,24,07.02.18,CET,0.0,0.0,1.0,1.0,,3
2,4,30.03.19,UTC,0.0,1.0,0.0,0.0,0.0,2
3,5,04.04.19,UTC,1.0,1.0,1.0,1.0,0.0,2
4,5,07.04.19,UTC,1.0,1.0,1.0,1.0,1.0,2
...,...,...,...,...,...,...,...,...,...
471,27,31.08.18,CEST,0.0,-1.0,1.0,1.0,,1
472,28,09.08.18,CEST,1.0,0.0,0.0,0.0,,1
473,28,10.08.18,CEST,0.0,0.0,0.0,1.0,,1
474,28,13.08.18,CEST,0.0,0.0,1.0,1.0,,1


In [231]:
query = '''
SELECT Y.subjectID,
    Y.date,
    temp.ActivityCounts,
    temp.Barometer,
    temp.BloodPerfusion,
    temp.BloodPulseWave,
    temp.EnergyExpenditure,
    temp.GalvanicSkinResponse,
    temp.HR,
    temp.HRV,
    temp.RESP,
    temp.Steps,
    temp.SkinTemperature,
    temp.ActivityClass,
    y.timezone,
    y.VAS,
    y.phF,
    y.MF,
    y.ReIP,
    y.sport,
    y.n_answers
FROM temp JOIN Y ON temp.subjectID = Y.subjectID AND temp.date = Y.date;'''
data_unnested = sqldf(query)
data_unnested

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,...,Steps,SkinTemperature,ActivityClass,timezone,VAS,phF,MF,ReIP,sport,n_answers
0,1,14.03.19,0.784314,981.25,0.000,0.00,753.636566,,83.000000,,...,0.0,28.280,,UTC,0.0,0.0,1.0,-1.0,0.0,1
1,1,14.03.19,1.176471,981.20,0.175,1.90,1256.060944,,75.403633,,...,0.0,29.440,,UTC,0.0,0.0,1.0,-1.0,0.0,1
2,1,14.03.19,2.352941,981.20,0.190,1.82,1256.060944,,69.112653,,...,0.0,30.410,,UTC,0.0,0.0,1.0,-1.0,0.0,1
3,1,14.03.19,1.176471,981.10,0.110,1.94,1256.060944,,69.781164,,...,0.0,31.000,,UTC,0.0,0.0,1.0,-1.0,0.0,1
4,1,14.03.19,1.176471,981.20,0.100,1.68,1256.060944,,72.306609,,...,0.0,31.455,,UTC,0.0,0.0,1.0,-1.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
617272,27,13.09.18,0.784314,966.90,0.140,2.31,1363.812563,,84.831541,42.300000,...,0.0,32.630,,CEST,0.0,0.0,0.0,0.0,,1
617273,27,13.09.18,5.882353,966.90,0.130,2.61,1330.995827,,82.644772,40.933333,...,0.0,32.690,,CEST,0.0,0.0,0.0,0.0,,1
617274,27,13.09.18,2.352941,966.90,0.130,2.34,1330.995827,,92.145701,41.636364,...,0.0,32.750,,CEST,0.0,0.0,0.0,0.0,,1
617275,27,13.09.18,1.960784,966.90,0.160,2.81,1330.995827,,91.410742,,...,0.0,32.810,,CEST,0.0,0.0,0.0,0.0,,1


# b) nested data

In [202]:
query = '''
SELECT Y.subjectID, Y.date, X.ActivityCounts, X.Barometer, X.BloodPerfusion, X.BloodPulseWave, X.EnergyExpenditure, X.GalvanicSkinResponse, X.HR, X.HRV, X.RESP, X.Steps, X.SkinTemperature, X.ActivityClass, y.timezone, y.VAS, y.phF, y.MF, y.ReIP, y.sport, y.n_answers
FROM X JOIN Y ON X.subjectID = Y.subjectID AND X.date = Y.date;'''
data = sqldf(query)
data

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,...,Steps,SkinTemperature,ActivityClass,timezone,VAS,phF,MF,ReIP,sport,n_answers
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,NaN,NaN,NaN,NaN,N...","0.0,1.9,1.82,1.94,1.68,1.09,NaN,NaN,NaN,NaN,Na...","753.6365662,1256.060944,1256.060944,1256.06094...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","83.0,75.40363269,69.11265332,69.78116438,72.30...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NaN,0....","28.28,29.44,30.41,31.0,31.455,31.795,32.22,31....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",UTC,0.0,0.0,1.0,-1.0,0.0,1
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.13,36.19,36.25,36.31,36.44,36.5,36.59,36.69...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",UTC,0.0,1.0,1.0,1.0,1.0,1
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","34.34,34.38,34.38,34.455,34.16,34.06,34.19,34....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",UTC,0.0,0.0,0.0,0.0,0.0,1
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","35.63,35.63,35.44,35.41,35.38,35.38,35.38,35.3...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",UTC,0.0,1.0,0.0,0.0,1.0,1
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.94,36.97,36.97,36.69,36.53,36.34,36.19,35.9...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",UTC,0.0,1.0,0.0,-1.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,27,27.02.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",CET,0.0,1.0,0.0,-1.0,,2
440,27,27.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","964.8,964.8,964.8,964.7,964.7,964.7,964.7,964....","0.65,0.64,0.66,0.65,0.66,0.66,0.62,0.66,0.66,0...","2.58,2.83,2.6,2.7,2.7,2.66,2.94,2.68,2.8,2.66,...","1330.995827,1330.995827,1330.995827,1330.99582...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","64.40149177,63.1057941,64.60267283,63.78507079...","62.2875,61.9,61.55,61.4,64.38333333,65.1906202...",...,"0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","37.56,37.56,37.56,37.59,37.59,37.56,37.63,37.6...","1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1....",CEST,0.0,1.0,0.0,1.0,,1
441,27,28.02.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",CET,0.0,1.0,0.0,0.0,,1
442,27,28.08.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",...,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...",CEST,0.0,0.0,0.0,1.0,,1


In [37]:
# missing data for fatigue values?
Y.shape[0] - data.shape[0]

32

In [38]:
query = '''
WITH yy AS (
    SELECT subjectID, date
    FROM Y
),
xx AS (
    SELECT subjectID, date
    FROM X
),
joined AS (
    SELECT *
    FROM xx NATURAL JOIN yy
)
SELECT *
FROM yy

EXCEPT

SELECT *
FROM joined;'''
sqldf(query)

Unnamed: 0,subjectID,date
0,2,17.04.19
1,2,18.04.19
2,2,19.04.19
3,2,20.04.19
4,2,21.04.19
5,2,22.04.19
6,22,02.06.19
7,24,13.12.17
8,24,14.12.17
9,24,15.12.17


In [39]:
X # seems like for 32 days we have questionnaires but no physiological data?

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,RESP,Steps,SkinTemperature,ActivityClass
0,1,14.03.19,"0.784313725,1.176470588,2.352941176,1.17647058...","981.25,981.2,981.2,981.1,981.2,981.3,981.2,981...","0.0,0.175,0.19,0.11,0.1,0.09,NaN,NaN,NaN,NaN,N...","0.0,1.9,1.82,1.94,1.68,1.09,NaN,NaN,NaN,NaN,Na...","753.6365662,1256.060944,1256.060944,1256.06094...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","83.0,75.40363269,69.11265332,69.78116438,72.30...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,14.5625,12.17552786,12.531234,14.69469...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NaN,0....","28.28,29.44,30.41,31.0,31.455,31.795,32.22,31....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
1,1,15.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","984.4,984.5,984.3,984.4,984.3,984.4,984.3,984....","0.875,0.825,0.975,0.98,1.0,0.985,0.96,0.93,0.9...","2.27,2.18,2.19,2.18,2.02,2.27,2.27,2.49,2.17,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.471338889,2.46735,2.466372222,2.4621,2.45465...","67.54923531,57.26287697,55.2253739,58.43175416...","44.92312073,51.26666667,57.7,58.01666667,57.41...","14.34420472,14.21430416,12.45540139,14.6309642...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.13,36.19,36.25,36.31,36.44,36.5,36.59,36.69...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
2,1,16.03.19,"0.784313725,1.176470588,5.098039216,0.78431372...","990.7,990.6,990.7,990.7,990.8,990.75,990.8,990...","1.045,0.79,0.84,0.585,0.415,0.74,0.835,0.835,0...","2.32,2.22,2.44,2.18,2.45,2.44,2.24,2.28,2.18,2...","1311.85318,1311.85318,1311.85318,1311.85318,13...","1.172788889,1.171305556,1.061122222,3.09794444...","64.65655738,63.83929024,63.86047157,62.6129328...","31.3815721,34.48333333,36.25,39.48333333,41.9,...","15.03026316,15.03544724,12.88235294,12.5884458...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","34.34,34.38,34.38,34.455,34.16,34.06,34.19,34....","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
3,1,17.03.19,"0.784313725,0.784313725,0.784313725,0.78431372...","982.15,982.05,982.0,982.0,982.0,982.0,982.0,98...","1.42,1.47,1.48,1.46,1.49,1.475,1.47,1.46,1.45,...","2.86,2.7,2.73,2.72,2.68,2.9,2.96,3.02,3.08,3.3...","1311.85318,1311.85318,1311.85318,1311.85318,13...","2.573777778,2.567694444,2.560622222,2.55622222...","69.66641006,70.49757838,70.17700916,69.3862999...","23.08598131,22.81666667,20.41666667,20.5166666...","14.23136959,13.68923699,16.7,16.6549496,13.494...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","35.63,35.63,35.44,35.41,35.38,35.38,35.38,35.3...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
4,1,18.03.19,"0.784313725,1.176470588,0.784313725,0.78431372...","988.5,988.6,988.7,988.65,988.5,988.5,988.5,988...","1.48,1.49,1.155,1.525,1.38,1.45,1.555,1.41,1.6...","1.54,1.61,2.18,2.01,1.76,1.76,1.78,1.81,1.5,1....","1289.98896,1311.85318,1311.85318,1311.85318,13...","2.632423729,2.630555556,2.626594444,2.62318333...","57.42528736,59.38585964,65.7858185,58.39290306...","33.69491525,34.18333333,37.91988636,46.85,52.9...","15.35642063,14.75081392,14.21644803,10.5670285...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.94,36.97,36.97,36.69,36.53,36.34,36.19,35.9...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946,27,30.08.18,"0.784313725,0.784313725,0.784313725,0.78431372...","967.1,967.05,967.0,967.1,967.1,967.0,967.0,967...","0.62,0.63,0.5,0.56,0.555,0.55,0.53,0.51,0.49,0...","2.68,2.7,2.78,2.45,2.58,2.67,2.52,2.5,2.6,2.6,...","1330.995827,1330.995827,1374.446035,1330.99582...","0.925716667,0.934888889,0.854785311,0.85772777...","63.41839611,63.30076726,65.60981987,58.1836150...","44.13333333,43.55,42.65485714,40.7,39.26666667...","16.56763527,17.25,16.70448549,15.14208633,14.6...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","36.44,36.41,36.38,36.34,36.31,36.22,36.16,36.0...","1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1...."
947,27,31.03.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
948,27,31.05.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."
949,27,31.07.18,"NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na...","NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Na..."


In [201]:
# convert comma separated strings to list
physiological_names = {'ActivityCounts',
                       'Barometer',
                       'BloodPerfusion',
                       'BloodPulseWave',
                       'EnergyExpenditure',
                       'GalvanicSkinResponse',
                       'HR',
                       'HRV',
                       'RESP',
                       'Steps',
                       'SkinTemperature',
                       'ActivityClass'}

for physiological_name in physiological_names:
    data[physiological_name] = data[physiological_name].str.split(',')
    #data[physiological_name] = pd.to_numeric(data[physiological_name].str.split(','), errors='coerce')
data

Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,...,Steps,SkinTemperature,ActivityClass,timezone,VAS,phF,MF,ReIP,sport,n_answers
0,1,14.03.19,,,,,,,,,...,,,,UTC,0.0,0.0,1.0,-1.0,0.0,1
1,1,15.03.19,,,,,,,,,...,,,,UTC,0.0,1.0,1.0,1.0,1.0,1
2,1,16.03.19,,,,,,,,,...,,,,UTC,0.0,0.0,0.0,0.0,0.0,1
3,1,17.03.19,,,,,,,,,...,,,,UTC,0.0,1.0,0.0,0.0,1.0,1
4,1,18.03.19,,,,,,,,,...,,,,UTC,0.0,1.0,0.0,-1.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,27,27.02.18,,,,,,,,,...,,,,CET,0.0,1.0,0.0,-1.0,,2
440,27,27.08.18,,,,,,,,,...,,,,CEST,0.0,1.0,0.0,1.0,,1
441,27,28.02.18,,,,,,,,,...,,,,CET,0.0,1.0,0.0,0.0,,1
442,27,28.08.18,,,,,,,,,...,,,,CEST,0.0,0.0,0.0,1.0,,1


In [193]:
data2 = data

In [194]:
for physiological_name in physiological_names:
    data2[physiological_name] = float(data2[physiological_name])
print(data2.dtypes)
data2

TypeError: cannot convert the series to <class 'float'>

In [200]:
data2['ActivityCounts'].apply(lambda x: map(float, x))
print(data2.dtypes)
data2

subjectID                 int64
date                     object
ActivityCounts           object
Barometer                object
BloodPerfusion           object
BloodPulseWave           object
EnergyExpenditure        object
GalvanicSkinResponse     object
HR                       object
HRV                      object
RESP                     object
Steps                    object
SkinTemperature          object
ActivityClass            object
timezone                 object
VAS                     float64
phF                     float64
MF                      float64
ReIP                    float64
sport                   float64
n_answers                 int64
dtype: object


Unnamed: 0,subjectID,date,ActivityCounts,Barometer,BloodPerfusion,BloodPulseWave,EnergyExpenditure,GalvanicSkinResponse,HR,HRV,...,Steps,SkinTemperature,ActivityClass,timezone,VAS,phF,MF,ReIP,sport,n_answers
0,1,14.03.19,"[0.784313725, 1.176470588, 2.352941176, 1.1764...","[981.25, 981.2, 981.2, 981.1, 981.2, 981.3, 98...","[0.0, 0.175, 0.19, 0.11, 0.1, 0.09, NaN, NaN, ...","[0.0, 1.9, 1.82, 1.94, 1.68, 1.09, NaN, NaN, N...","[753.6365662, 1256.060944, 1256.060944, 1256.0...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[83.0, 75.40363269, 69.11265332, 69.78116438, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[28.28, 29.44, 30.41, 31.0, 31.455, 31.795, 32...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",UTC,0.0,0.0,1.0,-1.0,0.0,1
1,1,15.03.19,"[0.784313725, 0.784313725, 0.784313725, 0.7843...","[984.4, 984.5, 984.3, 984.4, 984.3, 984.4, 984...","[0.875, 0.825, 0.975, 0.98, 1.0, 0.985, 0.96, ...","[2.27, 2.18, 2.19, 2.18, 2.02, 2.27, 2.27, 2.4...","[1311.85318, 1311.85318, 1311.85318, 1311.8531...","[2.471338889, 2.46735, 2.466372222, 2.4621, 2....","[67.54923531, 57.26287697, 55.2253739, 58.4317...","[44.92312073, 51.26666667, 57.7, 58.01666667, ...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.13, 36.19, 36.25, 36.31, 36.44, 36.5, 36.5...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",UTC,0.0,1.0,1.0,1.0,1.0,1
2,1,16.03.19,"[0.784313725, 1.176470588, 5.098039216, 0.7843...","[990.7, 990.6, 990.7, 990.7, 990.8, 990.75, 99...","[1.045, 0.79, 0.84, 0.585, 0.415, 0.74, 0.835,...","[2.32, 2.22, 2.44, 2.18, 2.45, 2.44, 2.24, 2.2...","[1311.85318, 1311.85318, 1311.85318, 1311.8531...","[1.172788889, 1.171305556, 1.061122222, 3.0979...","[64.65655738, 63.83929024, 63.86047157, 62.612...","[31.3815721, 34.48333333, 36.25, 39.48333333, ...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[34.34, 34.38, 34.38, 34.455, 34.16, 34.06, 34...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",UTC,0.0,0.0,0.0,0.0,0.0,1
3,1,17.03.19,"[0.784313725, 0.784313725, 0.784313725, 0.7843...","[982.15, 982.05, 982.0, 982.0, 982.0, 982.0, 9...","[1.42, 1.47, 1.48, 1.46, 1.49, 1.475, 1.47, 1....","[2.86, 2.7, 2.73, 2.72, 2.68, 2.9, 2.96, 3.02,...","[1311.85318, 1311.85318, 1311.85318, 1311.8531...","[2.573777778, 2.567694444, 2.560622222, 2.5562...","[69.66641006, 70.49757838, 70.17700916, 69.386...","[23.08598131, 22.81666667, 20.41666667, 20.516...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[35.63, 35.63, 35.44, 35.41, 35.38, 35.38, 35....","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",UTC,0.0,1.0,0.0,0.0,1.0,1
4,1,18.03.19,"[0.784313725, 1.176470588, 0.784313725, 0.7843...","[988.5, 988.6, 988.7, 988.65, 988.5, 988.5, 98...","[1.48, 1.49, 1.155, 1.525, 1.38, 1.45, 1.555, ...","[1.54, 1.61, 2.18, 2.01, 1.76, 1.76, 1.78, 1.8...","[1289.98896, 1311.85318, 1311.85318, 1311.8531...","[2.632423729, 2.630555556, 2.626594444, 2.6231...","[57.42528736, 59.38585964, 65.7858185, 58.3929...","[33.69491525, 34.18333333, 37.91988636, 46.85,...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.94, 36.97, 36.97, 36.69, 36.53, 36.34, 36....","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",UTC,0.0,1.0,0.0,-1.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439,27,27.02.18,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",...,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",CET,0.0,1.0,0.0,-1.0,,2
440,27,27.08.18,"[0.784313725, 0.784313725, 0.784313725, 0.7843...","[964.8, 964.8, 964.8, 964.7, 964.7, 964.7, 964...","[0.65, 0.64, 0.66, 0.65, 0.66, 0.66, 0.62, 0.6...","[2.58, 2.83, 2.6, 2.7, 2.7, 2.66, 2.94, 2.68, ...","[1330.995827, 1330.995827, 1330.995827, 1330.9...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[64.40149177, 63.1057941, 64.60267283, 63.7850...","[62.2875, 61.9, 61.55, 61.4, 64.38333333, 65.1...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[37.56, 37.56, 37.56, 37.59, 37.59, 37.56, 37....","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",CEST,0.0,1.0,0.0,1.0,,1
441,27,28.02.18,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",...,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",CET,0.0,1.0,0.0,0.0,,1
442,27,28.08.18,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",...,"[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...","[NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...",CEST,0.0,0.0,0.0,1.0,,1


In [198]:
float([1, 2, 3])

TypeError: float() argument must be a string or a number, not 'list'

In [149]:
data2.dtypes

subjectID                 int64
date                     object
ActivityCounts           object
Barometer                object
BloodPerfusion           object
BloodPulseWave           object
EnergyExpenditure        object
GalvanicSkinResponse     object
HR                       object
HRV                      object
RESP                     object
Steps                    object
SkinTemperature          object
ActivityClass            object
timezone                 object
VAS                     float64
phF                     float64
MF                      float64
ReIP                    float64
sport                   float64
n_answers                 int64
dtype: object

In [165]:
type(data2['ActivityCounts'][0])

list

In [153]:
import numpy as np

In [158]:
np.array(data2['ActivityCounts'][0]).astype(float)

dtype('float64')

In [167]:
data2['ActivityCounts'].apply(np.array, dtype=float)

0      [0.784313725, 1.176470588, 2.352941176, 1.1764...
1      [0.784313725, 0.784313725, 0.784313725, 0.7843...
2      [0.784313725, 1.176470588, 5.098039216, 0.7843...
3      [0.784313725, 0.784313725, 0.784313725, 0.7843...
4      [0.784313725, 1.176470588, 0.784313725, 0.7843...
                             ...                        
439    [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
440    [0.784313725, 0.784313725, 0.784313725, 0.7843...
441    [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
442    [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...
443    [0.784313725, 0.784313725, 0.784313725, 0.7843...
Name: ActivityCounts, Length: 444, dtype: object

In [181]:
data2 = data

In [182]:
data2 = data2.to_numpy()
data2

array([[1, '14.03.19',
        list(['0.784313725', '1.176470588', '2.352941176', '1.176470588', '1.176470588', '7.450980392', '0.784313725', '8.235294118', '0.784313725', '0.784313725', 'NaN', '4.31372549', '11.76470588', '0.784313725', '0.784313725', '0.784313725', 'NaN', '9.019607843', '2.352941176', '3.529411765', '0.784313725', '0.784313725', '1.568627451', '1.176470588', '0.784313725', '1.568627451', '0.784313725', '0.784313725', '0.784313725', '2.745098039', '0.784313725', '0.784313725', '1.176470588', '1.176470588', '1.176470588', '5.490196078', '9.411764706', '8.235294118', '5.882352941', '29.41176471', '16.8627451', '16.8627451', '20.39215686', '1.960784314', '8.62745098', '1.960784314', '8.62745098', '1.176470588', '11.37254902', '1.176470588', '7.843137255', '0.784313725', '9.411764706', '5.490196078', '7.058823529', '3.137254902', '2.352941176', '1.176470588', '2.352941176', '5.882352941', '1.960784314', '2.745098039', '1.960784314', '3.529411765', '1.568627451', '3.137254

In [185]:
data2[:, 2:14]

array([[list(['0.784313725', '1.176470588', '2.352941176', '1.176470588', '1.176470588', '7.450980392', '0.784313725', '8.235294118', '0.784313725', '0.784313725', 'NaN', '4.31372549', '11.76470588', '0.784313725', '0.784313725', '0.784313725', 'NaN', '9.019607843', '2.352941176', '3.529411765', '0.784313725', '0.784313725', '1.568627451', '1.176470588', '0.784313725', '1.568627451', '0.784313725', '0.784313725', '0.784313725', '2.745098039', '0.784313725', '0.784313725', '1.176470588', '1.176470588', '1.176470588', '5.490196078', '9.411764706', '8.235294118', '5.882352941', '29.41176471', '16.8627451', '16.8627451', '20.39215686', '1.960784314', '8.62745098', '1.960784314', '8.62745098', '1.176470588', '11.37254902', '1.176470588', '7.843137255', '0.784313725', '9.411764706', '5.490196078', '7.058823529', '3.137254902', '2.352941176', '1.176470588', '2.352941176', '5.882352941', '1.960784314', '2.745098039', '1.960784314', '3.529411765', '1.568627451', '3.137254902', '1.176470588', '1

In [178]:
np.array(data2[0, 2], dtype=float)

array([ 0.78431372,  1.17647059,  2.35294118,  1.17647059,  1.17647059,
        7.45098039,  0.78431372,  8.23529412,  0.78431372,  0.78431372,
               nan,  4.31372549, 11.76470588,  0.78431372,  0.78431372,
        0.78431372,         nan,  9.01960784,  2.35294118,  3.52941176,
        0.78431372,  0.78431372,  1.56862745,  1.17647059,  0.78431372,
        1.56862745,  0.78431372,  0.78431372,  0.78431372,  2.74509804,
        0.78431372,  0.78431372,  1.17647059,  1.17647059,  1.17647059,
        5.49019608,  9.41176471,  8.23529412,  5.88235294, 29.41176471,
       16.8627451 , 16.8627451 , 20.39215686,  1.96078431,  8.62745098,
        1.96078431,  8.62745098,  1.17647059, 11.37254902,  1.17647059,
        7.84313726,  0.78431372,  9.41176471,  5.49019608,  7.05882353,
        3.1372549 ,  2.35294118,  1.17647059,  2.35294118,  5.88235294,
        1.96078431,  2.74509804,  1.96078431,  3.52941176,  1.56862745,
        3.1372549 ,  1.17647059,  1.96078431,  0.78431372,  1.17

In [150]:
data2['ActivityCounts']

0      [0.784313725, 1.176470588, 2.352941176, 1.1764...
1      [0.784313725, 0.784313725, 0.784313725, 0.7843...
2      [0.784313725, 1.176470588, 5.098039216, 0.7843...
3      [0.784313725, 0.784313725, 0.784313725, 0.7843...
4      [0.784313725, 1.176470588, 0.784313725, 0.7843...
                             ...                        
439    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
440    [0.784313725, 0.784313725, 0.784313725, 0.7843...
441    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
442    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
443    [0.784313725, 0.784313725, 0.784313725, 0.7843...
Name: ActivityCounts, Length: 444, dtype: object

In [151]:
data2['ActivityCounts'][0]

['0.784313725',
 '1.176470588',
 '2.352941176',
 '1.176470588',
 '1.176470588',
 '7.450980392',
 '0.784313725',
 '8.235294118',
 '0.784313725',
 '0.784313725',
 'NaN',
 '4.31372549',
 '11.76470588',
 '0.784313725',
 '0.784313725',
 '0.784313725',
 'NaN',
 '9.019607843',
 '2.352941176',
 '3.529411765',
 '0.784313725',
 '0.784313725',
 '1.568627451',
 '1.176470588',
 '0.784313725',
 '1.568627451',
 '0.784313725',
 '0.784313725',
 '0.784313725',
 '2.745098039',
 '0.784313725',
 '0.784313725',
 '1.176470588',
 '1.176470588',
 '1.176470588',
 '5.490196078',
 '9.411764706',
 '8.235294118',
 '5.882352941',
 '29.41176471',
 '16.8627451',
 '16.8627451',
 '20.39215686',
 '1.960784314',
 '8.62745098',
 '1.960784314',
 '8.62745098',
 '1.176470588',
 '11.37254902',
 '1.176470588',
 '7.843137255',
 '0.784313725',
 '9.411764706',
 '5.490196078',
 '7.058823529',
 '3.137254902',
 '2.352941176',
 '1.176470588',
 '2.352941176',
 '5.882352941',
 '1.960784314',
 '2.745098039',
 '1.960784314',
 '3.529411765

In [137]:
data2['ActivityCounts'].apply(pd.to_numeric)

ValueError: Unable to parse string "NaN" at position 10

In [135]:
for i, row in data2.iterrows():
    data2.iloc[i] =

AttributeError: 'Series' object has no attribute 'convert_objects'

In [122]:
temp = data['ActivityCounts'].str.split('y')

In [123]:
temp[0]

['0.784313725,1.176470588,2.352941176,1.176470588,1.176470588,7.450980392,0.784313725,8.235294118,0.784313725,0.784313725,NaN,4.31372549,11.76470588,0.784313725,0.784313725,0.784313725,NaN,9.019607843,2.352941176,3.529411765,0.784313725,0.784313725,1.568627451,1.176470588,0.784313725,1.568627451,0.784313725,0.784313725,0.784313725,2.745098039,0.784313725,0.784313725,1.176470588,1.176470588,1.176470588,5.490196078,9.411764706,8.235294118,5.882352941,29.41176471,16.8627451,16.8627451,20.39215686,1.960784314,8.62745098,1.960784314,8.62745098,1.176470588,11.37254902,1.176470588,7.843137255,0.784313725,9.411764706,5.490196078,7.058823529,3.137254902,2.352941176,1.176470588,2.352941176,5.882352941,1.960784314,2.745098039,1.960784314,3.529411765,1.568627451,3.137254902,1.176470588,1.960784314,0.784313725,1.176470588,0.784313725,1.568627451,1.960784314,1.176470588,3.137254902,7.450980392,1.176470588,0.784313725,1.176470588,0.784313725,1.176470588,1.176470588,0.784313725,1.960784314,1.568627451

In [121]:
pd.to_numeric(temp[0], errors='coerce')

nan

In [93]:
pd.to_numeric(temp)

TypeError: Invalid object type at position 0

In [88]:
data['ActivityCounts'].fillna(value=pd.NA)

0      [0.784313725, 1.176470588, 2.352941176, 1.1764...
1      [0.784313725, 0.784313725, 0.784313725, 0.7843...
2      [0.784313725, 1.176470588, 5.098039216, 0.7843...
3      [0.784313725, 0.784313725, 0.784313725, 0.7843...
4      [0.784313725, 1.176470588, 0.784313725, 0.7843...
                             ...                        
439    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
440    [0.784313725, 0.784313725, 0.784313725, 0.7843...
441    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
442    [NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, ...
443    [0.784313725, 0.784313725, 0.784313725, 0.7843...
Name: ActivityCounts, Length: 444, dtype: object

In [82]:
pd.to_numeric(data['ActivityCounts'].fillna(value=pd.NA))

ValueError: Unable to parse string "0.784313725,1.176470588,2.352941176,1.176470588,1.176470588,7.450980392,0.784313725,8.235294118,0.784313725,0.784313725,NaN,4.31372549,11.76470588,0.784313725,0.784313725,0.784313725,NaN,9.019607843,2.352941176,3.529411765,0.784313725,0.784313725,1.568627451,1.176470588,0.784313725,1.568627451,0.784313725,0.784313725,0.784313725,2.745098039,0.784313725,0.784313725,1.176470588,1.176470588,1.176470588,5.490196078,9.411764706,8.235294118,5.882352941,29.41176471,16.8627451,16.8627451,20.39215686,1.960784314,8.62745098,1.960784314,8.62745098,1.176470588,11.37254902,1.176470588,7.843137255,0.784313725,9.411764706,5.490196078,7.058823529,3.137254902,2.352941176,1.176470588,2.352941176,5.882352941,1.960784314,2.745098039,1.960784314,3.529411765,1.568627451,3.137254902,1.176470588,1.960784314,0.784313725,1.176470588,0.784313725,1.568627451,1.960784314,1.176470588,3.137254902,7.450980392,1.176470588,0.784313725,1.176470588,0.784313725,1.176470588,1.176470588,0.784313725,1.960784314,1.568627451,2.745098039,1.176470588,1.568627451,0.784313725,1.568627451,6.666666667,1.568627451,2.745098039,0.784313725,1.176470588,1.568627451,1.176470588,1.176470588,1.176470588,1.176470588,0.784313725,3.529411765,1.176470588,1.960784314,1.568627451,1.568627451,1.176470588,1.568627451,1.568627451,1.176470588,1.176470588,2.745098039,1.176470588,1.176470588,1.960784314,3.921568627,1.568627451,1.960784314,1.176470588,1.176470588,1.176470588,3.137254902,1.568627451,1.176470588,1.176470588,1.176470588,7.843137255,9.803921569,3.921568627,3.137254902,2.352941176,12.15686275,8.62745098,2.745098039,4.705882353,3.137254902,9.803921569,2.745098039,1.176470588,29.80392157,22.35294118,7.843137255,9.803921569,1.176470588,26.66666667,10.98039216,6.274509804,5.490196078,9.411764706,1.176470588,1.568627451,0.784313725,5.490196078,2.745098039,2.352941176,1.176470588,0.784313725,0.784313725,1.568627451,0.784313725,3.137254902,5.098039216,1.176470588,3.529411765,4.705882353,1.176470588,1.176470588,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,1.176470588,0.784313725,1.176470588,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,7.058823529,5.490196078,14.11764706,5.490196078,22.74509804,9.803921569,9.411764706,21.56862745,12.94117647,71.76470588,21.17647059,22.35294118,28.62745098,1.568627451,3.529411765,7.450980392,4.31372549,9.019607843,1.960784314,3.137254902,2.352941176,5.882352941,6.666666667,0.784313725,1.568627451,6.274509804,4.705882353,3.137254902,5.490196078,2.745098039,2.352941176,2.352941176,1.568627451,1.176470588,3.529411765,2.745098039,2.352941176,1.176470588,1.176470588,1.176470588,3.529411765,5.098039216,7.450980392,1.960784314,1.568627451,9.411764706,2.352941176,3.137254902,3.529411765,3.137254902,1.176470588,6.666666667,2.745098039,9.803921569,1.176470588,3.137254902,4.31372549,3.921568627,2.352941176,1.568627451,1.568627451,2.745098039,1.960784314,2.352941176,1.568627451,1.960784314,1.568627451,1.176470588,5.098039216,1.960784314,3.529411765,1.960784314,7.058823529,3.921568627,4.705882353,0.784313725,3.137254902,2.352941176,2.352941176,1.960784314,0.784313725,1.176470588,1.568627451,3.529411765,8.62745098,1.176470588,1.176470588,5.098039216,1.176470588,1.176470588,1.176470588,1.176470588,1.568627451,5.098039216,1.960784314,0.784313725,4.31372549,0.784313725,2.352941176,1.176470588,2.745098039,3.921568627,5.490196078,3.137254902,3.137254902,7.058823529,5.098039216,0.784313725,1.176470588,1.960784314,4.705882353,3.137254902,3.137254902,1.176470588,1.568627451,3.921568627,1.176470588,3.921568627,5.098039216,1.176470588,2.745098039,1.176470588,1.568627451,2.352941176,2.352941176,1.176470588,1.176470588,3.137254902,0.784313725,1.568627451,1.568627451,1.176470588,3.921568627,1.568627451,1.176470588,1.176470588,1.176470588,4.705882353,1.568627451,8.235294118,5.490196078,1.176470588,5.098039216,1.960784314,25.09803922,6.274509804,4.705882353,4.31372549,18.03921569,3.921568627,1.960784314,5.882352941,3.921568627,6.274509804,7.058823529,3.529411765,3.529411765,7.450980392,5.882352941,3.921568627,0.784313725,10.58823529,3.921568627,1.568627451,4.705882353,5.098039216,3.137254902,9.803921569,1.176470588,5.882352941,4.31372549,3.529411765,0.784313725,0.784313725,1.568627451,1.176470588,3.137254902,1.568627451,5.882352941,3.529411765,6.666666667,1.176470588,0.784313725,1.176470588,1.176470588,1.568627451,0.784313725,0.784313725,0.784313725,1.176470588,5.490196078,1.960784314,1.568627451,1.960784314,1.960784314,1.176470588,1.176470588,0.784313725,1.176470588,1.568627451,4.31372549,5.098039216,4.705882353,3.529411765,2.745098039,1.176470588,0.784313725,1.176470588,0.784313725,0.784313725,12.94117647,0.784313725,1.568627451,1.176470588,5.882352941,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,1.960784314,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,3.137254902,7.450980392,5.490196078,6.274509804,3.921568627,3.921568627,1.568627451,5.882352941,1.176470588,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,2.352941176,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,1.176470588,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,1.176470588,0.784313725,1.176470588,0.784313725,1.176470588,0.784313725,0.784313725,2.745098039,10.19607843,4.705882353,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725,0.784313725" at position 0

In [77]:
type(data['ActivityCounts'])

pandas.core.series.Series

# Export data

In [203]:
output_path = '/'.join(path.split('/')[:-1]) + '/Output'

In [204]:
data.to_csv(output_path + '/combined_data.csv')

In [232]:
data_unnested.to_csv(output_path + '/combined_data_unnested.csv')