### To make the merged dataset
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_jm.csv", r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_JM") <br>
dataset = merger.merge_data()<br>

### Changing hyperparameters to make the dataset
You can change the initial value of constructor to change the hyperparameters such as time interval, criteria time for removing error in time interval group (eeg_remove_time_in_group), and accumulated body movement (BM_sum_minutes).<br>

### Reference 
time_interval : seconds for group (ex. 10secs)<br>
remove_time_in_group : the criteria of processing error values in each group (ex. 7secs). <br>
Also, for example, if there is 8secs group, this code will extend it to 10secs group and export representative value in the group.

### EEG

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

class EEGProcessor:
     
    # time_interval : Unified seconds with Fitbit data (ex. 10secs) 
    # remove_time_in_group : the criteria of processing error values in each time_interval group (ex. 7secs)
    
    def __init__(self, file_path, time_interval, remove_time_in_group):
        self.time_interval = time_interval
        self.remove_time_in_group = remove_time_in_group
        self.time_interval_str = f'{time_interval}S'
        self.EEG_report = pd.read_csv(file_path)

    # List to dataframe (ex. brain waves)
    def parse_raw_data(self, dataframe, col_name):
        col_str = dataframe.iloc[0][col_name]
        col_str = col_str.strip('[]')
        col_list = [float(val) for val in col_str.split(',')]  # cause list is divided by comma
        col_data = pd.DataFrame({col_name: col_list})
        return col_data

    # Experiment time calculating function
    def time_difference(self, dataframe, start_time_col, finish_time_col):
        start_time = datetime.strptime(dataframe.iloc[0][start_time_col], '%Y-%m-%d %H:%M:%S')
        finish_time = datetime.strptime(dataframe.iloc[0][finish_time_col], '%Y-%m-%d %0H:%M:%S')

        # time difference between two datatime objects
        time_difference = (finish_time - start_time).total_seconds()
        return time_difference
    
    # Comparing the experimental initial recognition error period and delete the part to be deleted
    def count_initial_same_values(self, series):
        initial_value = series.iloc[0]
        count = 0
        for value in series:
            if value == initial_value:
                count += 1
            else:
                break
        return count
    
    # Processing of values that are not exactly divided into front and back
    def process_start_time_trash_sec(self, start_time):
        # Plus 1 min and delete second in input time
        rounded_time = start_time + timedelta(minutes=1) - timedelta(seconds=start_time.second)
        time_difference = (rounded_time - start_time).total_seconds()

        remainder = time_difference % float(self.time_interval)

        # use the seconds over remove_time_in_group seconds
        if self.remove_time_in_group <= remainder:
            return False
        
        else:
            return remainder
        
    # Processing of values that are not exactly divided into front and back
    def process_finish_time_trash_sec(self, finish_time):
        # delete second in input time
        rounded_time = finish_time - timedelta(seconds=finish_time.second)
        time_difference = (finish_time - rounded_time).total_seconds()

        remainder = time_difference % float(self.time_interval)

        if self.remove_time_in_group <= remainder:
            return False
        
        else:
            return remainder

    # Rounding time to nearest time which can divided by time interval
    def nearest_time_rounding(self, time):
        seconds = time.second
        # For example, find nearest value in 0, 10, 20, 30, 40, 50sec
        time_points = [time_point for time_point in range(0,60, self.time_interval)]
        nearest = min(time_points, key=lambda time_point: abs(time_point - seconds))
        
        if nearest == time_points[-1] and seconds >= (time_points[-1] + self.remove_time_in_group):
            rounded_time = time.replace(second=0, microsecond=0) + timedelta(minutes=1)
        else:
            rounded_time = time.replace(second=nearest, microsecond=0)

        return rounded_time

    # Make the same end time
    def align_end_time(self, dataframe_1, dataframe_2):
        if dataframe_1.index[-1] > dataframe_2.index[-1]:
            dataframe_1 = dataframe_1[dataframe_1.index <= dataframe_2.index[-1]]

        elif dataframe_1.index[-1] < dataframe_2.index[-1]:
            dataframe_2 = dataframe_2[dataframe_2.index <= dataframe_1.index[-1]]

        else: 
            pass # if two dataframe's endtime is same

        return dataframe_1, dataframe_2

    # Adjust start time and end time processing
    # start time processing -> process_type : 0 , finish time processing -> process_type : -1
    # start time processing -> process_start_time_trash_sec func , finish time processing -> process_finish_time_trash_sec func    
    def adjust_time_index(self, process_type, dataframe, func):
        remainder = func(dataframe.index[process_type])
        
        # the last data only shows one original data, so processing this problem
        one_sec = timedelta(seconds=1)

        if remainder == False:
            # change time to nearest (Start time processing)
            if process_type == 0 :
                time = self.nearest_time_rounding(dataframe.index[process_type])
                new_index = dataframe.index.tolist()
                new_index[process_type] = time
                dataframe.index = new_index
            
            # change time to nearest (Finish time processing)
            else:
                time = self.nearest_time_rounding(dataframe.index[process_type]) - one_sec
                new_index = dataframe.index.tolist()
                new_index[process_type] = time
                dataframe.index = new_index
        
        # if remainder is under remove_time_in_group, just remove
        else:
            cutting_time = timedelta(seconds=remainder)
            # Start time processing
            if process_type == 0:
                dataframe = dataframe[dataframe.index >= dataframe.index[process_type] + cutting_time]
            
            # Finish time processing
            # make the seconds like 9, 19, 29...
            else:
                dataframe = dataframe[dataframe.index <= dataframe.index[-1] - cutting_time - one_sec]

        return dataframe
    
    # Removing error values in group (brain waves and attention score)
    def check_invalid_values(self, group):
        # find error data length in brain wave
        alpha_invalid_series = group['α_wave_raw_data'].diff().eq(0)
        alpha_invalid_timestamps = group.index[alpha_invalid_series].tolist()

        # find error data length in attention_raw_data
        attention_invalid_series = group['attention_raw_data'] == 0
        attention_invalid_timestamps = group.index[attention_invalid_series].tolist()

        # check whether the length of error data is over remove_time_in_group second
        def has_long_invalid_duration(invalid_timestamps):
            if not invalid_timestamps:
                return False
            for invalid_time in range(1, len(invalid_timestamps)):
                if (invalid_timestamps[invalid_time] - invalid_timestamps[invalid_time-1]).seconds > self.remove_time_in_group:
                    return True
            return False

        alpha_invalid = has_long_invalid_duration(alpha_invalid_timestamps)
        attention_invalid = has_long_invalid_duration(attention_invalid_timestamps)
        
        # make error values to missing values
        if alpha_invalid or attention_invalid:
            return pd.Series([np.nan] * group.shape[1], index=group.columns)

        else:
            # calculate average except error value
            valid_conditions = (
                (group['α_wave_raw_data'].diff() != 0) & 
                (group['β_wave_raw_data'].diff() != 0) & 
                (group['θ_wave_raw_data'].diff() != 0) & 
                (group['δ_wave_raw_data'].diff() != 0) & 
                (group['γ_wave_raw_data'].diff() != 0) & 
                (group['attention_raw_data'] != 0)
            )
            return group[valid_conditions].mean()

    # Removing error values in group (hr)
    def check_invalid_values_other(self, group):
        # find error data length in hr
        hr_invalid_series = group['hr_raw_data'] == 0
        hr_invalid_timestamps = group.index[hr_invalid_series].tolist()

        # check whether the length of error data is over remove_time_in_group second
        def has_long_invalid_duration(invalid_timestamps):
            if not invalid_timestamps:
                return False
            for invalid_time in range(1, len(invalid_timestamps)):
                if (invalid_timestamps[invalid_time] - invalid_timestamps[invalid_time-1]).seconds > self.remove_time_in_group:
                    return True
            return False

        hr_invalid = has_long_invalid_duration(hr_invalid_timestamps)

        # make error values to missing values
        if hr_invalid:
            return pd.Series([np.nan] * group.shape[1], index=group.columns)

        else:
            # calculate average except error value
            group = group[(group['hr_raw_data'] != 0)]
            return group.mean()
    
    # Process EEG data
    def process_eeg_data(self, experiment_id):
        if experiment_id not in self.EEG_report.index:
            return None

        # all experiments in one df
        EEG_report_sample = self.EEG_report.loc[[experiment_id],:]

        # one dataframe for one column
        cols = ['α_wave_raw_data', 'β_wave_raw_data', 'θ_wave_raw_data', 'δ_wave_raw_data', 'γ_wave_raw_data', 'attention_raw_data', 'hrv_raw_data', 'hr_raw_data', 'coherence_flag_raw_data']
        parsed_dfs = [self.parse_raw_data(EEG_report_sample, col) for col in cols]

        # calculate two interval second because there's two type of time interval in EEG data
        interval_sec = self.time_difference(EEG_report_sample, 'meditation_start_time', 'meditation_finish_time') / len(parsed_dfs[0])
        interval_sec_other = self.time_difference(EEG_report_sample, 'meditation_start_time', 'meditation_finish_time') / len(parsed_dfs[6])

        # make two merged dataframe
        merged_df = parsed_dfs[0].join(parsed_dfs[1:6])
        merged_df_other = parsed_dfs[6].join(parsed_dfs[7:])

        # experiment start time
        start_time = datetime.strptime(EEG_report_sample.iloc[0]['meditation_start_time'], '%Y-%m-%d %H:%M:%S')

        # change index to time index based on interval second
        interval_sec, interval_sec_other = timedelta(seconds=round(interval_sec,2)), timedelta(seconds=round(interval_sec_other,2))
        merged_df['time'] = [start_time + time * interval_sec for time in range(len(merged_df))]
        merged_df_other['time'] = [start_time + time * interval_sec_other for time in range(len(merged_df_other))]
        merged_df, merged_df_other = merged_df.set_index('time'), merged_df_other.set_index('time')
        
        # comparing the inital experiment error time
        counts = [self.count_initial_same_values(merged_df[col]) for col in cols[:6]] + [self.count_initial_same_values(merged_df_other['hr_raw_data'])]
        initial_error_times = [counts[error] * interval_sec.total_seconds() if error != 6 else counts[error] * interval_sec_other.total_seconds() for error in range(7)]
        initial_error_time = timedelta(seconds=max(initial_error_times))

        # dataset start time
        real_start_time = start_time + initial_error_time
        merged_df, merged_df_other = merged_df[merged_df.index > real_start_time], merged_df_other[merged_df_other.index > real_start_time]
        merged_df.index, merged_df_other.index = merged_df.index.round('S'), merged_df_other.index.round('S')

        # make the experiment end time same
        merged_df, merged_df_other = self.align_end_time(merged_df, merged_df_other)

        # start time process -> i : 0 , finish time process -> i : -1
        # start time process -> process_start_time_trash_sec func , finish time process -> process_finish_time_trash_sec func
        merged_df = self.adjust_time_index(0, merged_df, self.process_start_time_trash_sec)
        merged_df_other = self.adjust_time_index(0, merged_df_other, self.process_start_time_trash_sec)
        merged_df = self.adjust_time_index(-1, merged_df, self.process_finish_time_trash_sec)
        merged_df_other = self.adjust_time_index(-1, merged_df_other, self.process_finish_time_trash_sec)

        # grouping
        grouped = merged_df.groupby(merged_df.index.floor(self.time_interval_str))
        grouped_other = merged_df_other.groupby(merged_df_other.index.floor(self.time_interval_str))

        result = grouped.apply(self.check_invalid_values)
        result_other = grouped_other.apply(self.check_invalid_values_other)

        # final EEG dataset including β/θ SP ratio
        EEG_data_per_time_interval = result.merge(result_other, left_index=True, right_index=True)
        EEG_data_per_time_interval['β/θ SP'] = EEG_data_per_time_interval['β_wave_raw_data'] / EEG_data_per_time_interval['θ_wave_raw_data']
        
        EEG_data_per_time_interval = EEG_data_per_time_interval.rename(columns={
            'α_wave_raw_data':'alpha_wave',
            'β_wave_raw_data':'beta_wave',
            'θ_wave_raw_data':'theta_wave',
            'δ_wave_raw_data':'delta_wave',
            'γ_wave_raw_data':'gamma_wave',
            'attention_raw_data' : 'attention',
            'hrv_raw_data' : 'hrv',
            'hr_raw_data' : 'hr',
            'coherence_flag_raw_data' : 'coherence',
            'β/θ SP' : 'SP ratio'
        })

        return EEG_data_per_time_interval

## Fitbit

In [2]:
import datetime as dt
import matplotlib.pyplot as plt
import glob

class FitbitProcessor:
    '''
    time_interval : Unified seconds with EEG dataset (ex.10secs) 
    BM_sum_minutes
    Body Movement feature was meaningless because the experiment was conducted usually while sitting on the chair.
    So, created a new body movement feature as accumulated Body Movement value from previous time.
    And, the previous time is the BM_sum_minutes variable. (ex. 3 hours)
    '''
    def __init__(self, folder_path, time_interval, BM_sum_minutes):
        self.folder_path = folder_path
        self.BM_sum_minutes = BM_sum_minutes
        self.BM_sum_minutes_str = f'{BM_sum_minutes}T'
        self.time_interval = time_interval
        self.time_interval_str = f'{time_interval}S'
        folder_patterns = [
            "Active Zone Minutes (AZM)/*",
            "Sleep Score/*",
            "Stress Journal/CEDA*",
            "Temperature/Wrist Temperature - *"
        ]
        self.things_path = [glob.glob(f"{folder_path}/{pattern}") for pattern in folder_patterns]
        self.things_path = [item for sublist in self.things_path for item in sublist]
        self.wt_count = len(glob.glob(f"{folder_path}/Temperature/Wrist Temperature - *"))
        self.azm_count = len(glob.glob(f"{folder_path}/Active Zone Minutes (AZM)/*"))
        self.sleep_count = len(glob.glob(f"{folder_path}/Sleep Score/*"))
        self.eda_count = len(glob.glob(f"{folder_path}/Stress Journal/CEDA*"))
        # original fitbit dataset's time interval is 1 min
        self.original_interval = 60
        self.num_timestamps = self.original_interval // self.time_interval
        self.half_point = self.num_timestamps // 2
    
    def read_filtered_csv(self, path, columns):
        name = pd.read_csv(path)
        name = name[columns]
        name[columns[0]] = pd.to_datetime(name[columns[0]])
        return name
    
    def round_seconds(self, obj):
        if obj.second % self.time_interval == 0:
            return obj
        else:
            return obj - timedelta(seconds=obj.second % self.time_interval)
    
    def round_zero(self, datetime_obj):
        datetime_obj = datetime_obj.replace(second=0)
        return datetime_obj
    
    # Process whole fitbit data
    def process_fitbit_data(self):
        AZM_col = ['date_time', 'total_minutes']
        sleep_col = ['timestamp', 'deep_sleep_in_minutes']
        stress_col = ['timestamp', 'eda_level_real']
        temp_col = ['recorded_time', 'temperature']
        
        # merge all features
        things_col = [AZM_col] * self.azm_count + [sleep_col] * self.sleep_count + [stress_col] * self.eda_count + [temp_col] * self.wt_count
        things = [self.read_filtered_csv(path, col) for path, col in zip(self.things_path, things_col)]

        # if there's no wrist temperature
        if self.wt_count == 0:
            # if there's no eda data
            # there was no eda in two subjects' fitbit data
            if self.eda_count == 0:
                azm = self.process_azm(things[:self.azm_count])
                sleep = self.process_sleep(things[self.azm_count:self.azm_count+self.sleep_count])
                Min_Time, Max_Time = self.find_time_bounds([azm, sleep])
                
                # make final dataframe
                df = self.create_final_df([azm, sleep], Min_Time, Max_Time)
                df['timestamp'] = pd.to_datetime(df['timestamp'])
                df = df.set_index('timestamp')
                df = df.assign(temperature=np.nan)
                df = df.assign(eda=np.nan)
                
                return df
            
            else:
                azm = self.process_azm(things[:self.azm_count])
                sleep = self.process_sleep(things[self.azm_count:self.azm_count+self.sleep_count])
                eda = self.process_eda(things[self.azm_count+self.sleep_count:self.azm_count+self.sleep_count+self.eda_count])
                Min_Time, Max_Time = self.find_time_bounds([azm, sleep, eda])

                df = self.create_final_df([azm, sleep, eda], Min_Time, Max_Time)
                df['timestamp'] = pd.to_datetime(df['timestamp'])
                df = df.set_index('timestamp')
                df = df.assign(temperature=np.nan)
                return df
            
        # if there's no Active Zone Minutes data
        elif self.azm_count == 0:
            sleep = self.process_sleep(things[self.azm_count])
            eda = self.process_eda(things[self.azm_count+self.sleep_count:self.azm_count+self.sleep_count+self.eda_count])
            temp = self.process_temperature(things[self.azm_count+self.sleep_count+self.eda_count:])
            Min_Time, Max_Time = self.find_time_bounds([sleep, eda, temp])

            df = self.create_final_df([sleep, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(BM=np.nan)

            return df
        
        # if there's no sleep data
        elif self.sleep_count == 0:
            azm = self.process_azm(things[:self.azm_count])
            eda = self.process_eda(things[self.azm_count+self.sleep_count:self.azm_count+self.sleep_count+self.eda_count])
            temp = self.process_temperature(things[self.azm_count+self.sleep_count+self.eda_count:])
            Min_Time, Max_Time = self.find_time_bounds([azm, eda, temp])

            df = self.create_final_df([azm, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(sleep=np.nan)

            return df            
        
        # if there's no eda data
        elif self.eda_count == 0:
            azm = self.process_azm(things[:self.azm_count])
            sleep = self.process_sleep(things[self.azm_count:self.azm_count+self.sleep_count])
            temp = self.process_temperature(things[self.azm_count+self.sleep_count:])
            Min_Time, Max_Time = self.find_time_bounds([azm, sleep, temp])

            df = self.create_final_df([azm, sleep, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')
            df = df.assign(eda=np.nan)

            return df
            
        # if there's no error in data file
        else:
            azm = self.process_azm(things[:self.azm_count])
            sleep = self.process_sleep(things[self.azm_count:self.azm_count+self.sleep_count])
            eda = self.process_eda(things[self.azm_count+self.sleep_count:self.azm_count+self.sleep_count+self.eda_count])
            temp = self.process_temperature(things[self.azm_count+self.sleep_count+self.eda_count:])
            Min_Time, Max_Time = self.find_time_bounds([azm, sleep, eda, temp])

            df = self.create_final_df([azm, sleep, eda, temp], Min_Time, Max_Time)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.set_index('timestamp')

            return df
    
    # Process Active Zone Minutes data
    def process_azm(self, azm):

        azm = pd.concat(azm, axis=0)
        azm = azm.rename(columns={'date_time':'timestamp', 'total_minutes':'BM'})
        time_list = []
        body_movement = []

        for time in azm['timestamp']:
            start_timestamp = time - timedelta(seconds=(self.half_point * self.time_interval))

            for number in range(self.num_timestamps):
                new_timestamp = start_timestamp + timedelta(seconds = (number*self.time_interval))
                time_list.append(new_timestamp)

        for bm in azm['BM']:
            for _ in range(self.num_timestamps):
                body_movement.append(bm)

        azm_list = {'timestamp': time_list, 'BM': body_movement}
        azm_final = pd.DataFrame(azm_list)

        azm_final['timestamp'] = pd.to_datetime(azm_final['timestamp'])
        azm_final.set_index('timestamp', inplace=True)
        azm_final = azm_final.resample(self.time_interval_str).asfreq().fillna(0)
        azm_final['new_BM'] = azm_final['BM'].rolling(self.BM_sum_minutes_str, closed='right').sum()
        azm_final = azm_final.drop(['BM'], axis=1)
        azm_final = azm_final.rename(columns={'new_BM':'BM'})
        azm_final = azm_final.astype({'BM':'int'})
        azm_final.reset_index(inplace=True)
        
        return azm_final
    
    # Process sleep data (Deep sleep in minutes)
    def process_sleep(self, sleep):
        sleep = pd.concat(sleep, axis=0)
        sleep['timestamp'] = [
                self.round_zero(datetime.strptime(str(time)[0:19], '%Y-%m-%d %H:%M:%S')) for time in sleep['timestamp']
            ]
            
        sleep = sleep.rename(columns={'deep_sleep_in_minutes':'sleep'})
        return sleep
    
    # Process eda data
    def process_eda(self, eda):
        eda = pd.concat(eda, axis=0)
        eda['timestamp'] = [
            self.round_seconds(
                datetime.strptime(str(time)[0:19], '%Y-%m-%d %H:%M:%S') - dt.timedelta(hours=4)
            ) for time in eda['timestamp']]

        # EDA Bilinear Interpolation
        time_list = []
        eda_list = []

        for time in range(1, len(eda['timestamp']) - 1):
            start_timestamp = eda.iloc[time,0] - timedelta(seconds = self.half_point * self.time_interval)

            for num in range(self.num_timestamps):
                new_timestamp = start_timestamp + timedelta(seconds = (num * self.time_interval))
                time_list.append(new_timestamp)

                if num < self.half_point:
                    weight = (self.half_point - num) / self.num_timestamps
                    value = eda.iloc[time, 1] - ((eda.iloc[time, 1] - eda.iloc[time - 1, 1]) * weight)

                elif num == self.half_point:
                    value = eda.iloc[time,1]

                else:
                    weight = (num - self.half_point) / self.num_timestamps
                    value = eda.iloc[time, 1] + ((eda.iloc[time + 1, 1] - eda.iloc[time, 1]) * weight)

                eda_list.append(round(value, 2))

        eda_list = {'timestamp': time_list, 'eda': eda_list}
        eda_final = pd.DataFrame(eda_list)

        return eda_final
    
    # Process temperature data
    def process_temperature(self, temperature):
        #Temperature
        temp = pd.concat(temperature, axis=0)
        temp = temp.rename(columns={'recorded_time':'timestamp'})

        # Temperature Bilinear Interpolation
        time_list = []
        temp_list = []

        for time in range(1, len(temp['timestamp']) - 1):
            if self.half_point % 2 != 0:
                start_timestamp = temp.iloc[time,0] - timedelta(seconds = self.half_point * self.time_interval)
                for num in range(self.num_timestamps):
                    new_timestamp = start_timestamp + timedelta(seconds = (num * self.time_interval))
                    time_list.append(new_timestamp)

                    if num < self.half_point:
                        weight = (self.half_point - num) / self.num_timestamps 
                        value = temp.iloc[time, 1] - ((temp.iloc[time, 1] - temp.iloc[time - 1, 1]) * weight)

                    elif num == self.half_point:
                        value = temp.iloc[time,1]

                    else:
                        weight = (num - self.half_point) / self.num_timestamps
                        value = temp.iloc[time, 1] + ((temp.iloc[time + 1, 1] - temp.iloc[time, 1]) * weight)

                    temp_list.append(round(value, 6))

        temp_list = {'timestamp': time_list, 'temperature': temp_list}
        temp_final = pd.DataFrame(temp_list)

        return temp_final
    
    # find minimum and maximum time of whole feature
    def find_time_bounds(self, dataframes):
        min_times = []
        max_times = []
        
        for df in dataframes:
            if not df.empty:
                min_times.append(df['timestamp'].min())
                max_times.append(df['timestamp'].max())
                
        if not min_times or not max_times:
            Min_Time = pd.Timestamp.now(tz='UTC')
            Max_Time = pd.Timestamp.now(tz='UTC')
        else:
            Min_Time = min(min_times)
            Max_Time = max(max_times)

        return Min_Time, Max_Time

    # create dataframe from Min_time to Max_time
    def create_final_df(self, datasets, Min_Time, Max_Time):
        fitbit = pd.date_range(start=Min_Time, end=Max_Time, freq=self.time_interval_str, name='timestamp')
        fitbit = pd.DataFrame(fitbit)

        for dataset in datasets:
            fitbit = pd.merge(fitbit, dataset, how='outer', on='timestamp')
            
        fitbit['BM'] = fitbit['BM'].fillna(0)
        fitbit['sleep'] = fitbit['sleep'].fillna(method='ffill')

        return fitbit

### Merging EEG and Fitbit dataset

In [3]:
class DataMerger(EEGProcessor, FitbitProcessor):
    '''
    We've tested some hyperparameters, and "time_interval=10, eeg_remove_time_in_group=7, BM_sum_minutes=180" have shown the best R-squared score.
    
    [Tested hyperparameters]
    1. Time interval : 10, 12, 15, 20sec & eeg_remove_time_in_group : 7, 9, 12, 16sec (7~80% proportion of time interval)
    R-squared score was best when we split the dataset into 10 seconds group.
    
    2. BM (Body Movement) sum minutes : 1h, 1h 30m, 2h, 2h 30m, 3h
    R-squared score was best when we set up the BM (Body Movement) sum minutes as 3 hours.     
    '''
    
    def __init__(self, eeg_filepath, fitbit_folderpath, time_interval=10, eeg_remove_time_in_group=7, BM_sum_minutes=180):
        # Initialize by calling parent class constructor
        EEGProcessor.__init__(self, eeg_filepath, time_interval, eeg_remove_time_in_group)
        FitbitProcessor.__init__(self, fitbit_folderpath, time_interval, BM_sum_minutes)
        
        self.eeg_filepath = eeg_filepath

    # Merge EEG and Fitbit data
    def merge_data(self):
        # processing EEG data
        eeg_data = pd.read_csv(self.eeg_filepath)
        result_dfs = []
        
        # For all experiments in the eeg data csv file
        for exp_id in range(3, len(eeg_data)):
            processed_data = self.process_eeg_data(exp_id)
            if processed_data is not None:
                result_dfs.append(processed_data)
                
        if result_dfs:
            combined_eeg = pd.concat(result_dfs)
            combined_eeg.index = pd.to_datetime(combined_eeg.index)
        
        # processing Fitbit data
        fitbit_data = self.process_fitbit_data()
        fitbit_data.index = pd.to_datetime(fitbit_data.index)

        # merging two dataframes
        if 'combined_eeg' in locals() and not fitbit_data.empty:
            merged_df = combined_eeg.merge(fitbit_data, left_index=True, right_index=True, how='left')
            return merged_df
        else:
            return None

### JM

In [4]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_jm.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_JM")

final_jm = merger.merge_data()

In [5]:
final_jm.head(50)

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-11-14 15:57:20,93.073867,101.2298,91.487475,85.287567,93.953042,75.166667,0.0,74.727273,0.0,1.106488,180.0,77.0,1.75,-2.644591
2023-11-14 15:57:30,92.174644,101.052081,88.4127,81.636656,93.337656,71.3125,0.0,67.071429,0.0,1.142959,180.0,77.0,1.75,-2.642925
2023-11-14 15:57:40,91.430813,101.817931,88.213431,81.221506,93.895269,74.0,0.0,69.333333,0.0,1.154223,180.0,77.0,1.75,-2.641258
2023-11-14 15:57:50,90.187181,102.2657,86.477213,79.674694,94.265081,79.1875,0.0,69.733333,0.0,1.182574,180.0,77.0,1.75,-2.639591
2023-11-14 15:58:00,89.896237,101.705987,85.428069,78.610544,94.130119,85.625,0.0,66.428571,0.0,1.190545,180.0,77.0,1.75,-2.637925
2023-11-14 15:58:10,90.786747,101.160007,87.130247,79.28902,93.433473,82.733333,0.0,64.533333,0.0,1.161021,180.0,77.0,1.75,-2.634591
2023-11-14 15:58:20,91.792859,101.106918,90.829182,83.228265,92.7373,70.882353,3.333333,65.8,0.0,1.113155,180.0,77.0,1.75,-2.631258
2023-11-14 15:58:30,91.349262,102.254669,89.975431,83.009019,93.308462,59.25,41.666667,71.4,0.0,1.136473,179.0,77.0,1.74,-2.627925
2023-11-14 15:58:40,90.460027,103.160947,86.89842,78.993053,94.175553,83.866667,53.357143,77.142857,0.0,1.187144,178.0,77.0,1.74,-2.624591
2023-11-14 15:58:50,90.136375,102.453244,85.976169,77.766006,93.956113,86.8125,57.2,68.2,0.0,1.191647,177.0,77.0,1.74,-2.621258


### YH

In [6]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_yh.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_YH")

final_yh = merger.merge_data()

In [7]:
final_yh.tail(50)

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-11-06 15:46:40,95.304375,98.099838,96.32665,89.476206,89.956044,59.375,49.466667,88.8,0.0,1.018408,0.0,,1.65,
2023-11-06 15:46:50,95.765263,99.745037,96.954356,90.108063,91.002238,84.5,50.0,83.625,0.0,1.028783,0.0,,1.66,
2023-11-06 15:47:00,95.717163,100.862525,97.179644,90.435656,91.682081,79.8125,68.066667,76.866667,0.0,1.037898,0.0,,1.67,
2023-11-06 15:47:10,94.526138,101.847019,96.254163,89.936031,92.452419,84.1875,79.733333,84.466667,0.0,1.058105,0.0,,1.68,
2023-11-06 15:47:20,94.222631,102.529056,96.222612,90.042556,93.008538,92.8125,76.533333,82.666667,0.0,1.06554,0.0,,1.7,
2023-11-06 15:47:30,95.482413,102.924544,97.001231,90.633844,92.99355,86.5,63.533333,81.933333,0.0,1.061064,0.0,,1.71,
2023-11-06 15:47:40,96.533229,102.939688,97.887024,91.872871,93.011159,88.764706,39.333333,84.666667,0.6,1.051617,0.0,,1.72,
2023-11-06 15:47:50,96.54565,102.790919,97.679388,92.104675,93.236775,88.5625,30.5,85.125,1.0,1.05233,0.0,,1.74,
2023-11-06 15:48:00,95.622,102.422825,96.957712,90.680906,93.190831,88.0,29.866667,85.0,1.0,1.056366,0.0,,1.75,
2023-11-06 15:48:10,94.940138,102.447513,95.240763,88.270006,93.178094,76.875,28.933333,91.8,0.2,1.075669,0.0,,1.76,


### SJ

In [8]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_sj.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_SJ")

final_sj = merger.merge_data()

In [9]:
final_sj

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-12-04 18:03:10,100.641719,104.055731,98.839319,91.340231,90.580894,59.312500,0.000000,96.400000,0.0,1.052777,0.0,25.0,,-3.44852
2023-12-04 18:03:20,100.424041,104.037747,98.898876,91.422094,90.936035,53.000000,0.000000,99.428571,0.0,1.051961,0.0,25.0,,-3.48352
2023-12-04 18:03:30,100.372906,102.556144,99.175506,91.914456,90.845019,67.937500,0.000000,96.866667,0.0,1.034087,0.0,25.0,,-3.51852
2023-12-04 18:03:40,100.313256,101.751350,99.703950,91.643969,91.333881,77.875000,0.000000,96.928571,0.0,1.020535,0.0,25.0,,-3.55352
2023-12-04 18:03:50,100.118694,100.874988,99.881831,92.174181,91.786656,80.250000,0.000000,101.666667,0.0,1.009943,0.0,25.0,,-3.58852
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-26 15:49:00,88.533725,91.830050,91.784025,86.792563,83.557844,71.562500,23.800000,84.133333,0.0,1.000501,144.0,,,
2023-10-26 15:49:10,88.279559,91.665329,91.247653,85.769071,82.060547,52.294118,23.642857,83.214286,0.0,1.004577,144.0,,,
2023-10-26 15:49:20,88.249319,91.194156,91.339025,85.600575,81.334037,56.125000,23.666667,84.066667,0.0,0.998414,144.0,,,
2023-10-26 15:49:30,88.498756,91.265400,91.343244,85.459506,81.624012,41.500000,25.400000,86.066667,0.0,0.999148,144.0,,,


### SA

In [10]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_sa.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_SA")

final_sa = merger.merge_data()

In [11]:
final_sa

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,temperature,eda
2023-10-25 20:46:00,93.575000,97.275000,90.775000,84.525000,87.712500,70.900000,0.000000,92.454545,0.0000,1.071606,,,,
2023-10-25 20:46:10,91.460000,95.300000,88.820000,81.240000,85.320000,66.460000,0.000000,94.875000,0.0000,1.072957,,,,
2023-10-25 20:46:20,91.166667,93.866667,88.400000,79.933333,83.900000,58.466667,0.000000,99.875000,0.0000,1.061840,,,,
2023-10-25 20:46:30,91.300000,94.000000,89.266667,80.866667,84.233333,58.000000,0.000000,100.812500,0.9375,1.053025,,,,
2023-10-25 20:46:40,90.675000,93.700000,88.350000,80.725000,83.500000,56.125000,0.000000,99.125000,1.0000,1.060555,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-22 23:13:20,93.566667,101.433333,93.633333,85.066667,93.066667,68.866667,28.111765,88.705882,0.0000,1.083304,,,,
2023-10-22 23:13:30,92.600000,101.200000,93.600000,84.800000,93.200000,71.400000,27.587500,81.750000,0.0000,1.081197,,,,
2023-10-22 23:13:40,91.675000,100.625000,93.075000,84.100000,92.800000,75.425000,33.456250,80.750000,0.0000,1.081117,,,,
2023-10-22 23:13:50,91.300000,100.600000,92.700000,83.450000,93.050000,79.450000,35.706250,87.000000,0.0000,1.085221,,,,


### BS

In [12]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_bs.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_BS")

final_bs = merger.merge_data()

In [13]:
final_bs

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-11-17 12:10:30,93.019312,93.617219,92.697050,84.242675,80.213156,48.500000,0.000000,73.000000,0.0,1.009927,0.0,109.0,,-1.011603
2023-11-17 12:10:40,92.829937,93.757231,92.225469,83.678362,80.836500,50.187500,0.000000,74.200000,0.0,1.016609,0.0,109.0,,-0.991603
2023-11-17 12:10:50,92.416144,93.843019,92.094938,83.677144,81.030619,54.062500,0.000000,74.785714,0.0,1.018981,0.0,109.0,,-0.971603
2023-11-17 12:11:00,92.147275,93.505738,92.654281,83.939119,80.195300,40.687500,0.000000,74.000000,0.0,1.009190,0.0,109.0,,-0.951603
2023-11-17 12:11:10,92.351737,93.435869,92.162013,83.354862,80.550087,40.187500,0.000000,75.428571,0.0,1.013822,0.0,109.0,,-0.946603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01 09:21:10,94.775150,102.985619,95.263031,88.816587,95.419300,70.437500,41.400000,82.400000,0.0,1.081066,132.0,93.0,15.69,
2023-11-01 09:21:20,95.397287,102.349556,95.283419,87.917800,94.635475,73.875000,41.071429,78.285714,0.0,1.074159,132.0,93.0,15.79,
2023-11-01 09:21:30,95.002125,101.265862,94.431525,86.806162,93.304162,64.375000,42.400000,82.266667,0.0,1.072373,132.0,93.0,15.90,
2023-11-01 09:21:40,93.216906,100.529582,92.097306,85.104882,91.898318,73.882353,46.533333,90.400000,0.0,1.091558,132.0,93.0,16.00,


### MJ

In [14]:
merger = DataMerger(r"C:\Users\ballj\OneDrive\바탕 화면\EEG_mj.csv",
                    r"C:\Users\ballj\OneDrive\바탕 화면\Fitbit_MJ")

final_mj = merger.merge_data()

In [15]:
final_mj

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,temperature,eda
2023-11-28 06:35:00,99.321762,104.989377,99.192715,93.391423,95.731000,65.923077,0.000000,86.000000,0.0,1.058438,608,82.0,,
2023-11-28 06:35:10,98.575943,105.100657,98.355886,93.271264,95.321229,63.142857,0.000000,88.000000,0.0,1.068575,607,82.0,,
2023-11-28 06:35:20,97.275273,104.858807,97.556647,91.331913,95.139713,62.466667,0.000000,94.538462,0.0,1.074850,606,82.0,,
2023-11-28 06:35:30,96.520993,104.305133,96.896500,89.907907,95.445147,65.066667,0.000000,98.785714,0.0,1.076459,605,82.0,,
2023-11-28 06:35:40,96.582727,104.390060,96.300400,88.761467,95.372240,66.000000,0.000000,97.769231,0.0,1.084004,604,82.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-17 21:43:20,96.315144,103.291975,94.035750,86.910163,94.625831,65.875000,38.866667,116.666667,0.0,1.098433,0,,,
2023-10-17 21:43:30,93.948044,102.910219,93.174344,86.244975,93.519894,76.375000,44.142857,118.214286,0.0,1.104491,0,,,
2023-10-17 21:43:40,93.530106,103.214356,92.222706,85.301488,93.937281,85.375000,48.857143,118.500000,0.0,1.119186,0,,,
2023-10-17 21:43:50,94.769312,103.681287,92.163050,85.526931,94.443725,75.187500,43.200000,116.200000,0.0,1.124977,0,,,


### Concat EEG

In [17]:
dataset = pd.concat([final_jm, final_sj, final_bs, final_yh, final_mj, final_sa])
dataset = dataset.sort_index()
dataset

Unnamed: 0,alpha_wave,beta_wave,theta_wave,delta_wave,gamma_wave,attention,hrv,hr,coherence,SP ratio,BM,sleep,eda,temperature
2023-10-13 14:37:50,94.475168,94.363024,89.191624,81.660980,89.852968,56.040000,0.000000,87.454545,0.0,1.057981,0.0,,15.44,
2023-10-13 14:38:00,97.985212,99.036385,89.636236,81.682403,92.746733,74.363636,0.000000,87.500000,0.0,1.104870,0.0,,15.38,
2023-10-13 14:38:10,99.778753,100.436269,90.163825,81.577762,94.082919,75.562500,0.000000,84.500000,0.0,1.113931,0.0,,15.33,
2023-10-13 14:38:20,100.100510,100.489161,90.710826,81.809868,94.274471,73.612903,0.000000,82.428571,0.0,1.107797,0.0,,15.28,
2023-10-13 14:38:30,100.019000,101.285213,91.774919,83.064909,93.262953,79.843750,0.000000,84.214286,0.0,1.103626,0.0,,15.23,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-04 18:21:50,91.772825,95.562744,92.988800,88.081600,88.064787,60.000000,14.333333,91.266667,1.0,1.027680,,,,
2023-12-04 18:22:00,90.664781,95.058044,92.667394,88.362131,87.163969,72.875000,11.785714,91.142857,1.0,1.025798,,,,
2023-12-04 18:22:10,90.599100,95.196150,92.522212,88.088725,86.696719,54.875000,9.733333,92.333333,1.0,1.028900,,,,
2023-12-04 18:22:20,90.525512,95.285562,93.186975,89.072631,86.904881,53.437500,9.785714,93.785714,1.0,1.022520,,,,


In [None]:
start_time = dataset.index[-1]
start_time = start_time.strftime('%Y-%m-%d %H:%M:%S')
start_time = start_time.replace(":", "_")

In [None]:
dataset.to_csv(r'C:\Users\ballj\OneDrive\바탕 화면\dataset_{}.csv'.format(start_time))