# Lambda Function Module
This notebook includes all the functions needed for the **lambda operations**. Each of these functions performs three operations:
1. Imports the data
2. Cleans and/or reorganizes the data in a usable way
3. Write the data to a csv file

## Thermal Comfort
The output from the thermal comfort data includes:
1. Timestamp
2. Hourly-averaged temperatures by individual
3. Hourly-averaged relative humidity by individual
4. Hourly-averaged temperature by group
5. Hourly-averaged relative humiditity by group

In [17]:
def lambdaThermalComfort(starting='03/11/2019', ending='04/15/2019'):
    '''
    Inputs:
        - starting: string representing the first date to use in the data range
        - ending: string representing the last date to use in the data range
    Returns a dataframe containing the timestamp and the values for variables defined by the sensor type
    '''
    raw_data = pd.DataFrame()
    id_list = []
    # Importing the data
    for folder in os.listdir('Data/'):
        if folder[0] != '.':
            id_list.append(folder)
            ## Important variables
            DIR = 'Data/' + folder + '/beacon_data/bevo/sht31d/' # Location of file
            temp = pd.DataFrame() # Stores one csv file's worth of data

            ## Looping through all the files in the sensor directory
            for file in os.listdir(DIR):
                if str(file[-3:]) == 'csv': # To ensure that we only read in csv files
                    temp = pd.read_csv(DIR + file,header=None,names=['Time','RH','Temperature_C'])
                    temp['ID'] = folder
                    raw_data = pd.concat([raw_data,temp],axis=0,ignore_index=True)

    ## Creating a date array for indexing that converts utctimestamp to Central Time
    raw_data = raw_data.dropna() # Dropping any NaNs
    t = np.zeros((len(raw_data)),dtype='datetime64[ns]') # Array to store times
    for j in range(len(t)):
        ts = int(raw_data['Time'].values[j])
        t[j] = datetime.strptime(datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'),'%Y-%m-%d %H:%M:%S') - timedelta(hours=5)

    ## Re-indexing and re-naming
    raw_data['Time'] = t
    raw_data = raw_data.set_index('Time') # Setting time as the dataframe index
    raw_data = raw_data.sort_index()

    ## Adding column for temperature in Farenheit
    raw_data['Temperature_F'] = raw_data['Temperature_C']*1.8+32

    ## Removing data from DF that isn't in the deployment range
    start_date = datetime.strptime(starting, '%m/%d/%Y') # converting input to datetime
    end_date = datetime.strptime(ending, '%m/%d/%Y') # converting input to datetime
    ### Checking to see if there is data in the range
    if raw_data.index[-1] < start_date:
        print('\tNo data from this deployment range')

    ## Checking to see if we are importing one day's worth of data
    elif start_date == end_date:
        raw_data = raw_data[raw_data.index.month == start_date.month] # mask by month
        raw_data = raw_data[raw_data.index.day == start_date.day] # mask by the day

    ## Normal range of data
    else:
        ### Variables to store the correct indexes
        start_index = 0
        end_index = -1
        ### Looping through to find the start dates
        for j in range(len(raw_data)):
            if raw_data.index[j].month == start_date.month and raw_data.index[j].day == start_date.day:
                #### Once we find the month and date, we want to break so that we store the first entry from that day
                start_index = j
                break
            if raw_data.index[j] > start_date:
                #### In the rare case we tried to import a day that is not present in the dataset, we have to find the next closest
                start_index = j
                break
        ### Removing the data gathered before the start index/start date
        raw_data = raw_data[start_index:]

        ### Looping through remaining values to find the end date
        for j in range(len(raw_data)):
            if raw_data.index[j] > end_date:
                end_index = j-1
                break

        ### Removing any data that remains after the ending index/end date
        raw_data = raw_data[0:end_index]
        
    # Averaging Data by Hour
    raw_data['Month'] = raw_data.index.month
    raw_data['Day'] = raw_data.index.day
    raw_data['Hour'] = raw_data.index.hour
    temp = raw_data.groupby(['ID','Month','Day','Hour']).mean()
    
    for name in id_list:
        temp.loc[name].to_csv('Files/' + name + '_ThermalConditions.csv')
    
    return raw_data,temp

## Indoor Air Quality 

## Fitbit Sleep Quality

## Beiwe Sleep Quality