In [1]:
from datetime import datetime
from pathlib import Path
import pandas as pd

In [2]:

def get_start_end_time_logfile(log_file_path, date):
    '''
    This function gets the start time and end time from the log file.
    Input: log file path and date
    Returns: start time of the log file in datetime format
    '''
    log_start_time = ''
    #open log file
    with open(log_file_path, 'r') as file:
        #get the start time
        file.seek(0)  # Move pointer to beginning of file
        for i in range(2):  # Move pointer to end of second line
            file.readline()
        third_line_pos = file.tell()  # Get position of beginning of third line
        file.seek(third_line_pos)  # Move pointer to beginning of third line
        third_line = file.readline()  # Read third line
        log_start_time = third_line.split('\t')[0]
        log_start_time = date+" "+log_start_time

        file.seek(0)
        # Read the contents of the file
        lines = file.readlines()

        # Get the index of the second last line
        second_last_line_index = len(lines) - 2

        # Move the file pointer to the start of the second last line
        file.seek(0)
        file.seek(sum(len(line) for line in lines[:second_last_line_index]))

        # Read the contents of the second last line
        second_last_line = file.readline()
        log_end_time = second_last_line.split('\t')[0]
        log_end_time = date+" "+log_end_time
        file.close()
        #convert to datetime format
        exp_start_time = datetime.strptime(log_start_time, '%d-%m-%Y %H:%M:%S.%f')
        exp_end_time = datetime.strptime(log_end_time, '%d-%m-%Y %H:%M:%S.%f')
        return exp_start_time, exp_end_time

In [3]:
# create a dataframe from the data file
def remove_timestamp(data_file_path,exp_start_time, exp_end_time):
    '''
    This function removes the data outside the experiment
    start and end time using the timestamp.
    Input: data file path, start time and end time in datetime format
    Returns: dataframe with data within the experiment time
    '''
    df = pd.read_csv(data_file_path, header=4)
    df[' Timestamp (Formatted)'] = pd.to_datetime(df[' Timestamp (Formatted)'])
    # check for first sample index equal to 0
    first_idx = df.index[(df['Sample Index']==0.0)].tolist()[0]
    df = df.loc[first_idx:]

    #since the sampling frequency was 250 Hz, the sample index should be periodic with 250 values and not 255
    #create a new column having periodic values from 0.0 to 250
    df.insert(1, 'Changed Sample Index', 0.0)
    df['Changed Sample Index'] = ([*range(0, 250)] * 10000)[:len(df)]

    #remove the data outside the experiment time
    start_idx = df.index[(df[' Timestamp (Formatted)'] >= exp_start_time) & (df['Changed Sample Index']==0.0)].tolist()[0]
    # print(df['Changed Sample Index'][start_idx])
    end_idx = df.index[(df[' Timestamp (Formatted)'] >= exp_end_time) & (df['Changed Sample Index']==0.0)].tolist()[0]
    # print(df['Changed Sample Index'][end_idx])
    df = df.loc[start_idx:end_idx]
    # print(df.head(1))
    return df


In [4]:
User_data_path = Path.cwd() / 'User_data'

for folder in User_data_path.iterdir():
    #iterate on every session in the folder
    for session in folder.iterdir():
        #check if there is a .csv file present
        if any(file.suffix == '.csv' for file in session.iterdir()):
            continue
        log_file_path = ''
        data_file_path = ''
        date = ''
        exp_start_time = ''
        exp_end_time = ''
        for file in session.iterdir():
            #check if the file is a log file
            if file.suffix == '.log':
                log_file_path = Path(file)
                date = str(log_file_path).split('_')[6]
                exp_start_time, exp_end_time = get_start_end_time_logfile(log_file_path, date)

            #check for file with .txt extension and not pulse data and track data
            if file.suffix == '.txt' and 'pulse' not in str(file) and 'track' not in str(file):
                data_file_path = file

        df1 = remove_timestamp(data_file_path,exp_start_time, exp_end_time)
        df1.to_csv(data_file_path.with_suffix('.csv'), index=False)

In [5]:
#iterate on every folder in User_data
for folder in User_data_path.iterdir():
    #iterate on every session in the folder
    for session in folder.iterdir():
        #check if there is a .csv file present
        for file in session.iterdir():
            #check if the file is a log file
            if file.suffix == '.csv':
                #check if file size is less than 1MB
                if file.stat().st_size < 1000:
                    print(file)
