# UT2000 Data Processing
For the MADS framework paper, we need to present the data. This notebooks helps reorganize the data so that it should be clear for anyone that picks it up.

In [219]:
import warnings
warnings.filterwarnings('ignore')

In [220]:
import os
import os.path
from os import path

from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates

import pandas as pd
import numpy as np

# ID Crossover
We need the IDs from the multiply modalities to cross-reference the participants.

In [221]:
ids_1000 = pd.read_csv('../data/raw/ut1000/admin/id_crossover.csv')
# limiting so we don't have repeats with ut2000
ids_1000 = ids_1000[ids_1000['record'] < 2000]
ids_1000.head()

Unnamed: 0,record,beiwe,beacon
0,1025,2qki3fim,
1,1041,ygny19ey,
2,1063,wozr2hv8,
3,1065,11i3mr4n,
4,1083,5buzzcr7,


In [222]:
ids_2000 = pd.read_csv('../data/raw/ut2000/admin/id_crossover.csv')
ids_2000.head()

Unnamed: 0,record,beiwe,beacon
0,2000,wsv7bqh4,
1,2001,vcdohuvw,
2,2002,7zo5b6ma,
3,2003,gy1mxy8v,
4,2004,qbrarqn9,


In [223]:
# combining ut1000 and 2000 records
ids = ids_1000.append(ids_2000)

# Fitbit Data
The Fitbit data is combined into a single CSV that contains all participants for a certain datatype. I need to import each file, separate by beiwe ID, and then export again.

In [224]:
data_dir = '../data/raw/ut2000/fitbit/'
export_dir = '../data/processed/MADS_framework_data/'
# looping through all the files
for file in os.listdir(data_dir):
    if file[-1] == 'v': # checking if csv file
        df = pd.read_csv(data_dir+file,index_col=0)
        fitbit_label = file.split('merged')[0][:-1]
        fitbit_ids = df.index.unique().values
        for fid in fitbit_ids:
            df_byid = df[df.index.values == fid]
            
            try:
                bid = ids[ids['record'] == fid]['beiwe'].values[0]
                df_byid['beiwe'] = bid
                df_byid['fitbit'] = fid
                if path.isdir(export_dir + bid + '/Fitbit/'):
                    df_byid.to_csv(export_dir+bid+'/Fitbit/'+fitbit_label+'.csv')
                else:
                    os.mkdir(f'../data/processed/MADS_framework_data/{bid}')
                    os.mkdir(f'../data/processed/MADS_framework_data/{bid}/Fitbit')
                    df_byid.to_csv(export_dir+bid+'/Fitbit/'+fitbit_label+'.csv')
            except:
                print(f'ID {fid} not in study')
                

ID 1 not in study
ID 2 not in study
ID 7001 not in study
ID 7002 not in study
ID 7003 not in study
ID 7005 not in study
ID 7006 not in study
ID 7007 not in study
ID 7008 not in study
ID 7009 not in study
ID 7010 not in study
ID 7011 not in study
ID 799999 not in study


# Beacon Data
The beacon data might not as easy to coax into a format that works. The beacon data in the UT1000 file does not present any useful data so we can skip straight to the UT2000 data.

In [225]:
# headers for datafiles - pms, sgp, sht
headers = [['unix_time','pm1','pm2p5','pm10',
           'std1','std2p5','std10',
           'pc0p3','pc0p5','pc1','pc2p5','pc5','pc10'],
           ['unix_time','eco2','tvoc'],
           ['unix_time','rh','tc']
          ]
dirs = ['PM','TVOC','TRH']

In [256]:
data_dir = '../data/raw/ut2000/beacon'
export_dir = '../data/processed/MADS_framework_data'
for beacon in os.listdir(data_dir):
    print(f'Reading for beacon {beacon}')
    for sensor, header, sensor_dir in zip(['pms5003','sgp30','sht31d'], headers, dirs):
        for file in os.listdir(f'{data_dir}/{beacon}/bevo/{sensor}/'):
            df = pd.read_csv(f'{data_dir}/{beacon}/bevo/{sensor}/{file}',names=header)
            beacon_no = int(beacon.split('-')[2])
            try:
                bid = ids_2000[ids_2000['beacon'] == beacon_no]
                bid = bid['beiwe'].values[0]
                df['beiwe'] = bid
                df['beacon'] = beacon_no
                df.set_index('unix_time',inplace=True)

                if path.isdir(f'{export_dir}/{bid}/BEVO/{sensor_dir}/'):
                    df.to_csv(f'{export_dir}/{bid}/BEVO/{sensor_dir}/{file}')
                elif not path.isdir(f'{export_dir}/{bid}'):
                    os.mkdir(f'{export_dir}/{bid}/')
                    os.mkdir(f'{export_dir}/{bid}/BEVO/')
                    os.mkdir(f'{export_dir}/{bid}/BEVO/{sensor_dir}/')
                    df.to_csv(f'{export_dir}/{bid}/BEVO/{sensor_dir}/{file}')
                elif not path.isdir(f'{export_dir}/{bid}/BEVO/'):
                    os.mkdir(f'{export_dir}/{bid}/BEVO/')
                    os.mkdir(f'{export_dir}/{bid}/BEVO/{sensor_dir}/')
                    df.to_csv(f'{export_dir}/{bid}/BEVO/{sensor_dir}/{file}')
                else:
                    os.mkdir(f'{export_dir}/{bid}/BEVO/{sensor_dir}/')
                    df.to_csv(f'{export_dir}/{bid}/BEVO/{sensor_dir}/{file}')
            except:
                print(f'Beacon {beacon_no} not deployed')

Reading for beacon beacon-d3-06
Reading for beacon beacon-d3-01
Reading for beacon beacon-d3-08
Reading for beacon beacon-d3-09
Reading for beacon beacon-d3-00
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beacon 0 not deployed
Beac

Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 not deployed
Beacon 20 n