In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

### Export

In [2]:
filepaths = []
subject = 1
session = 1
missing = 0
sub_total = 24 #24 is the default amount
master_df = pd.DataFrame(columns=['onset', 'duration', 'trial_type', 'response_time', 'sample', 'value','subject_id','session'])

# Loop through 24 subject and their 2 sessions to concatenation all data into single datafram: master_df.
# List files not found and count total sessions concatenated.
while subject <=sub_total:
    if subject <10:
        sub = '00' + str(subject)
    else:
        sub = '0' + str(subject)
        
    while session <=2: #there are max 2 sessions per subject
        ses = '0' + str(session)
        filepath = f'../ds001787-1.0.2/sub-{sub}/ses-{ses}/eeg/sub-{sub}_ses-{ses}_task-meditation_events.tsv'
        try:
            eeg_df = pd.read_csv(filepath, sep='\t', header=0)
        except:
            print(filepath.split('/')[5],"does not exist.")
            missing += 1
        eeg_df['subject_id'] = [sub] * len(eeg_df)
        eeg_df['session'] = [ses] *len(eeg_df)
#         print(eeg_df.head(25))
        master_df = pd.concat([master_df,eeg_df])
        session+=1
    subject += 1
    session = 1
total_ses = sub_total*2 - missing
print(f"Finished concatenating {total_ses} sessions.")

sub-008_ses-02_task-meditation_events.tsv does not exist.
sub-012_ses-02_task-meditation_events.tsv does not exist.
sub-013_ses-02_task-meditation_events.tsv does not exist.
sub-014_ses-02_task-meditation_events.tsv does not exist.
sub-015_ses-02_task-meditation_events.tsv does not exist.
sub-019_ses-02_task-meditation_events.tsv does not exist.
sub-020_ses-02_task-meditation_events.tsv does not exist.
sub-021_ses-02_task-meditation_events.tsv does not exist.
sub-024_ses-02_task-meditation_events.tsv does not exist.
Finished concatenating 39 sessions.


### Transform

In [3]:
# Reorder columns and check if duration and response_time columns are entirely NaN. Drop if so.
master_df = master_df[['subject_id','session','onset','duration','trial_type','response_time','sample','value']]
if master_df.duration.isnull().all():
    master_df.drop(columns=['duration'])
if master_df.response_time.isnull().all():
    master_df.drop(columns=['response_time'])
master_df.head(10)

Unnamed: 0,subject_id,session,onset,duration,trial_type,response_time,sample,value
0,1,1,71.386719,,stimulus,,18275.0,128
1,1,1,75.730469,,response,,19387.0,2
2,1,1,79.773438,,response,,20422.0,2
3,1,1,125.609375,,stimulus,,32156.0,128
4,1,1,179.800781,,stimulus,,46029.0,128
5,1,1,183.097656,,response,,46873.0,2
6,1,1,185.632812,,response,,47522.0,4
7,1,1,284.859375,,stimulus,,72924.0,128
8,1,1,287.757812,,response,,73666.0,2
9,1,1,289.960938,,response,,74230.0,2


In [4]:
master_df.set_index(['subject_id','session','onset'],inplace=True)

In [5]:
master_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,duration,trial_type,response_time,sample,value
subject_id,session,onset,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,71.386719,,stimulus,,18275.0,128
1,1,75.730469,,response,,19387.0,2
1,1,79.773438,,response,,20422.0,2
1,1,125.609375,,stimulus,,32156.0,128
1,1,179.800781,,stimulus,,46029.0,128
1,1,183.097656,,response,,46873.0,2
1,1,185.632812,,response,,47522.0,4
1,1,284.859375,,stimulus,,72924.0,128
1,1,287.757812,,response,,73666.0,2
1,1,289.960938,,response,,74230.0,2


### Create database connection

In [7]:
connection_string = "postgres:Ch3vr!3r17@localhost:5432/eeg_db"
engine = create_engine(f'postgresql://{connection_string}')

In [8]:
# Confirm tables
engine.table_names()

[]

### Load Database into PostGres

In [9]:
master_df.to_sql(name='master_eeg', con=engine, if_exists='replace', index=True)

In [11]:
# Confirm tables
# engine.table_names()
pd.read_sql_query('select * from master_eeg',con=engine).head()

Unnamed: 0,subject_id,session,onset,duration,trial_type,response_time,sample,value
0,1,1,71.386719,,stimulus,,18275.0,128
1,1,1,75.730469,,response,,19387.0,2
2,1,1,79.773438,,response,,20422.0,2
3,1,1,125.609375,,stimulus,,32156.0,128
4,1,1,179.800781,,stimulus,,46029.0,128
