In [8]:
from pathlib import Path
import csv

def extract_subject_and_date_from_first_file(base_folders: list, output_csv: str):
    # 결과를 저장할 리스트
    data = []
    
    for base_folder in base_folders:
        base_path = Path(base_folder)
        
        # 폴더 순회
        for subject_folder in base_path.iterdir():
            if subject_folder.is_dir():
                subject_id = subject_folder.name  # subject_id는 상위 폴더명에서 추출
                
                # 하위 폴더의 파일을 순회하면서 첫 번째 파일만 참조
                for subfolder in subject_folder.glob('**/*'):
                    if subfolder.is_file():
                        # 파일명에서 점(.)을 기준으로 분할하여 연도, 월, 일을 추출
                        file_name_parts = subfolder.name.split('.')
                        if len(file_name_parts) >= 7:
                            # 파일명에서 연도, 월, 일 추출 (5번째, 6번째, 7번째 점 뒤의 값이 각각 연도, 월, 일)
                            year = file_name_parts[5]
                            month = file_name_parts[6]
                            day = file_name_parts[7]
                            
                            # 날짜 형식으로 변환
                            date_formatted = f"{year}.{month}.{day}"
                            
                            # 데이터 리스트에 추가
                            data.append([subject_id, date_formatted])
                        break  # 첫 번째 파일만 참조하므로 break로 루프 중단

    # CSV 파일로 저장
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["subject_id", "fmri_date"])  # 헤더 추가
        writer.writerows(data)  # 데이터 쓰기
    
    print(f"CSV 파일이 {output_csv}로 저장되었습니다.")

mnt1_dir = "/mnt/NAS2/data/"
mnt2_dir = "/mnt/NAS2-2/data/"

base_folder_1 = f"{mnt1_dir}/SAD_gangnam_resting/Dicom/"
base_folder_2 = f"{mnt2_dir}/SAD_gangnam_resting_2/Dicom2_prep/"

dp_dir = f"{mnt2_dir}/SAD_gangnam_DP/"
base_folders = [base_folder_1, base_folder_2]
output_csv = f'{dp_dir}subject_fmri_date.csv'  # 저장할 CSV 파일 경로
extract_subject_and_date_from_first_file(base_folders, output_csv)

CSV 파일이 /mnt/NAS2-2/data//SAD_gangnam_DP/subject_fmri_date.csv로 저장되었습니다.


In [9]:
import pandas as pd
from pathlib import Path
mnt_dir = "/mnt/NAS2-2/data/"
dp_dir = f"{mnt_dir}/SAD_gangnam_DP/"
feature_dir = f"{dp_dir}/dp_features/"
feature_path = Path(feature_dir)
dp_path = Path(dp_dir)

id_lookup_table = pd.read_csv(dp_path/"id_lookup_table.csv")
dp_subject_table = pd.read_csv(dp_path/"digital_phenotype_subject.csv")
subject_fmri_date = pd.read_csv(dp_path/"subject_fmri_date.csv")

In [10]:
id_lookup_table

Unnamed: 0.1,Unnamed: 0,fmri_code,Screening #,Enrollment #,HAID ID,이름
0,0,s0007,1,EXP-0001,0827jck,장찬경
1,1,s0005,2,EXP-0002,na0840,윤나예
2,2,s0004,3,EXP-0003,spwls915,조예진
3,3,c0009,4,HC-0001,wkddbswns,장윤준
4,4,s0012,5,EXP-0004,bje5409,박정은
...,...,...,...,...,...,...
154,161,s0583,162,EXP-0088,goun4238,최고운
155,163,s0597,164,EXP-0090,john9987,최광묵
156,164,s0608,165,EXP-0091,jungminji57,정민지
157,165,c0609,166,HC-0075,gemma9em,김경미


In [11]:
dp_subject_table

Unnamed: 0,group,nickname,pid,subjNum
0,SAD,0827jck,1,7
1,SAD,na0840,2,5
2,SAD,spwls915,3,4
3,SAD,bje5409,5,12
4,SAD,stevenliu,8,8
...,...,...,...,...
68,HC,dryflowersoo,56,90
69,HC,anna05077,57,89
70,HC,cholebera,60,91
71,HC,ffddss1448,74,170


In [12]:
subject_fmri_date

Unnamed: 0,subject_id,fmri_date
0,c0034,2022.02.26
1,c0292,2023.11.04
2,s0128,2023.02.25
3,c0053,2022.04.09
4,c0064,2022.05.28
...,...,...
163,s0407,2024.02.17
164,s0316,2023.11.04
165,s0440,2024.03.16
166,s0368,2024.05.11


In [13]:
# Step 1: Merge digital_phenotype_subject with id_lookup_table using 'nickname' and 'HAID ID'
merged_df = pd.merge(dp_subject_table, id_lookup_table[['HAID ID', 'fmri_code']], 
                     left_on='nickname', right_on='HAID ID', how='left')

# Step 2: Merge the result with subject_fmri_date using 'fmri_code' and 'subject_id'
final_df = pd.merge(merged_df, subject_fmri_date[['subject_id', 'fmri_date']], 
                    left_on='fmri_code', right_on='subject_id', how='left')

# Step 3: Select the relevant columns and rename them accordingly
final_df = final_df[['group', 'nickname', 'pid', 'subjNum', 'fmri_code', 'fmri_date']]

# Saving the final dataframe to a CSV file
final_df.to_csv(dp_path/"dp_fmri_subject.csv", index=False)