In [7]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm

# GS

In [8]:
cohort = "GS"

In [9]:
df_pair = pd.read_csv(
    f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/{cohort}/relationship_information/relatives.formatted.info",
    sep='\t'
)
df_pair

Unnamed: 0,DOR,rcode,relationship,volid,relid,volage,relage,volsex,relsex,Erx
0,1,SB,daughter-sister,18826,21244,50,36,F,F,0.750000
1,1,SB,different-sex-sibling,34422,23884,33,35,F,M,0.353553
2,1,PC,daughter-mother,79198,67531,66,44,F,F,0.500000
3,1,SB,daughter-sister,20399,67531,38,44,F,F,0.750000
4,1,SB,daughter-sister,67267,67531,43,44,F,F,0.750000
...,...,...,...,...,...,...,...,...,...,...
38001,3,HAV,son-mother-father-daughter,34570,78069,50,39,F,M,0.353553
38002,3,HAV,son-mother-father-daughter,97449,79361,40,29,F,M,0.353553
38003,3,HAV,son-mother-father-daughter,97449,5360,40,22,F,M,0.353553
38004,3,HAV,son-mother-father-daughter,15442,83545,35,29,F,M,0.353553


In [10]:
# Load GRM
grm_id_fn = "/data/jerrylee/data/GS/GRM/GS_GWAS.grm.id"
# grm_bin_fn = "/data/jerrylee/data/GS/GRM/GS_GWAS.grm.bin"

In [11]:
# read id in GRM
ids = np.loadtxt(grm_id_fn, dtype='S10')
if len(ids.shape) > 1:
    ids = ids[:, 1]
N = len(ids)
id_dict = {}
for i in range(0, N):
    ids_str = ids[i].decode("utf-8")
    id_dict[ids_str] = i

In [33]:
# additive genetic matrix without genotype
N = len(ids)
eR = np.eye(N)

# Fill eR matrix based on df_pair information
for _, row in df_pair.iterrows():
    # Get indices from id_dict
    i = id_dict[str(row['volid'])]
    j = id_dict[str(row['relid'])]
    d = row['DOR']
    
    # Fill both (i,j) and (j,i) positions due to symmetry
    eR[i,j] = 0.5**d
    eR[j,i] = 0.5**d

# GRM을 binary 파일로 저장
output_bin_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.eA.grm.bin"  # 저장할 파일 이름

# 행렬의 하삼각 부분만 추출 (열 우선 순서로)
lower_tri = eR[np.tril_indices(N)]

# binary 파일로 저장
lower_tri.astype(np.float32).tofile(output_bin_fn)

# id 파일 저장
output_id_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.eA.grm.id"
with open(output_id_fn, 'w') as f:
    for i, id_str in enumerate(ids):
        f.write(f"{id_str.decode('utf-8')}\t{id_str.decode('utf-8')}\n")

In [6]:
# shared env matrix
for dor in [1, 2, 3]:
    S = np.eye(N)
    tmp_pair = df_pair[df_pair['DOR'] == dor].copy()
    for _, row in tqdm(tmp_pair.iterrows(), total=len(tmp_pair), desc=f'DOR {dor}'):
        d = row['DOR']
        if d == dor:
            # Get indices from id_dict
            i = id_dict[str(row['volid'])]
            j = id_dict[str(row['relid'])]
            
            S[i,j] = 1
            S[j,i] = 1
    
    # save GRM
    output_bin_fn = f"/data/jerrylee/data/GS/GRM/GS_S{dor}.grm.bin"  # 저장할 파일 이름

    # 행렬의 하삼각 부분만 추출 (열 우선 순서로)
    lower_tri = S[np.tril_indices(N)]

    # binary 파일로 저장
    lower_tri.astype(np.float32).tofile(output_bin_fn)

    # id 파일 저장
    output_id_fn = f"/data/jerrylee/data/GS/GRM/GS_S{dor}.grm.id"
    with open(output_id_fn, 'w') as f:
        for i, id_str in enumerate(ids):
            f.write(f"{id_str.decode('utf-8')}\t{id_str.decode('utf-8')}\n")


DOR 1: 100%|██████████| 18258/18258 [00:01<00:00, 16370.30it/s]
DOR 2: 100%|██████████| 15114/15114 [00:00<00:00, 16259.45it/s]
DOR 3: 100%|██████████| 4634/4634 [00:00<00:00, 16557.19it/s]


single shared env matrix (w_s = 0.95)

In [17]:
# single shared env matrix (w_s = 0.95)
S = np.eye(N)
tmp_pair = df_pair.copy()
for _, row in tqdm(tmp_pair.iterrows(), total=len(tmp_pair)):
    d = row['DOR']
    
    # Get indices from id_dict
    i = id_dict[str(row['volid'])]
    j = id_dict[str(row['relid'])]
    
    S[i,j] = 0.95 ** (d - 1)
    S[j,i] = 0.95 ** (d - 1)

  0%|          | 0/38006 [00:00<?, ?it/s]

100%|██████████| 38006/38006 [00:02<00:00, 16565.88it/s]


In [20]:
# check
dor = 3
# 샘플 3개 추출
sample_pairs = df_pair[df_pair['DOR'] == dor].sample(3)

# volid와 relid를 각각 리스트로 저장
vol_list = sample_pairs['volid'].tolist()
rel_list = sample_pairs['relid'].tolist()

# 또는 한 번에 두 ID를 튜플 리스트로 저장하는 방법
pair_list = list(zip(sample_pairs['volid'], sample_pairs['relid']))

for k in range(len(pair_list)):
    i = id_dict[str(pair_list[k][0])]
    j = id_dict[str(pair_list[k][1])]
    print(S[i,j], S[j,i])

0.9025 0.9025
0.9025 0.9025
0.9025 0.9025


In [21]:
S

array([[1.  , 0.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , ..., 0.  , 0.  , 0.  ],
       [0.  , 0.  , 1.  , ..., 0.  , 0.  , 0.  ],
       ...,
       [0.  , 0.  , 0.  , ..., 1.  , 0.95, 1.  ],
       [0.  , 0.  , 0.  , ..., 0.95, 1.  , 1.  ],
       [0.  , 0.  , 0.  , ..., 1.  , 1.  , 1.  ]])

In [22]:
# save GRM
output_bin_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.S.grm.bin"  # 저장할 파일 이름

# 행렬의 하삼각 부분만 추출 (열 우선 순서로)
lower_tri = S[np.tril_indices(N)]

# binary 파일로 저장
lower_tri.astype(np.float32).tofile(output_bin_fn)

# id 파일 저장
output_id_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.S.grm.id"
with open(output_id_fn, 'w') as f:
    for i, id_str in enumerate(ids):
        f.write(f"{id_str.decode('utf-8')}\t{id_str.decode('utf-8')}\n")


# UKB

In [4]:
cohort = "UKB"

In [5]:
df_pair = pd.read_csv(
    f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/{cohort}/relationship_information/relatives.formatted.info",
    sep='\t'
)
df_pair

Unnamed: 0,DOR,rcode,relationship,volid,relid,volage,relage,volsex,relsex,Erx
0,1,SB,daughter-sister,1000094,3653174,65,64,F,F,0.75
1,1,,,1000220,1691267,64,64,F,F,
2,1,,,1000286,1571411,53,70,F,F,
3,1,,,1000295,1045127,60,41,F,F,
4,1,,,1000476,3599303,50,51,F,M,
...,...,...,...,...,...,...,...,...,...,...
81321,3,1C,son-(father-brother)/(father-sister)-daughter,6023723,4863061,62,64,F,M,0.00
81322,3,,,6024211,1209127,53,60,M,F,
81323,3,,,6024384,1854265,62,44,M,M,
81324,3,1C,son-(father-sister)/(father-sister)/(mother-br...,6024486,3148753,58,56,M,M,0.00


In [86]:
for dor in [1, 2, 3]:
    # Load GRM id
    grm_id_fn = f"/data/jerrylee/data/UKB/grm_rel/DOR{dor}_chrALL.grm.id"
    ids = np.loadtxt(grm_id_fn, dtype='S10')
    if len(ids.shape) > 1:
        ids = ids[:, 1]
    N = len(ids)
    id_dict = {}
    for i in range(0, N):
        ids_str = ids[i].decode("utf-8")
        id_dict[ids_str] = i
    
    # additive genetic matrix without genotype
    N = len(ids)
    eR = np.eye(N)
    
    # id_dict의 키들을 set으로 변환 (문자열 형태로)
    valid_ids = set(id_dict.keys())

    # volid와 relid가 모두 valid_ids에 있는 행만 필터링
    filtered_df_pair = df_pair[
        (df_pair['volid'].astype(str).isin(valid_ids)) & 
        (df_pair['relid'].astype(str).isin(valid_ids))
    ]
    filtered_df_pair = filtered_df_pair[filtered_df_pair["DOR"] == dor]

    print(f"DOR {dor}", flush=True)
    print(filtered_df_pair.groupby("DOR").size(), flush=True)
    
    # Fill eR matrix based on df_pair information
    for _, row in tqdm(filtered_df_pair.iterrows(), total=len(filtered_df_pair), desc=f'DOR {dor}'):
        # Get indices from id_dict
        i = id_dict[str(row['volid'])]
        j = id_dict[str(row['relid'])]
        d = row['DOR']
        
        # Fill both (i,j) and (j,i) positions due to symmetry
        eR[i,j] = 0.5**d
        eR[j,i] = 0.5**d

    # GRM을 binary 파일로 저장
    output_bin_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.eA{dor}.grm.bin"  # 저장할 파일 이름

    # 행렬의 하삼각 부분만 추출 (열 우선 순서로)
    lower_tri = eR[np.tril_indices(N)]

    # binary 파일로 저장
    lower_tri.astype(np.float32).tofile(output_bin_fn)

    # id 파일 저장
    output_id_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.eA{dor}.grm.id"
    with open(output_id_fn, 'w') as f:
        for i, id_str in enumerate(ids):
            f.write(f"{id_str.decode('utf-8')}\t{id_str.decode('utf-8')}\n")

DOR 1
DOR
1    8349
dtype: int64


DOR 1: 100%|██████████| 8349/8349 [00:00<00:00, 14403.18it/s]


DOR 2
DOR
2    2468
dtype: int64


DOR 2: 100%|██████████| 2468/2468 [00:00<00:00, 14905.09it/s]


DOR 3
DOR
3    12159
dtype: int64


DOR 3: 100%|██████████| 12159/12159 [00:00<00:00, 13300.36it/s]


In [90]:
# shared env matrix
for dor in [1, 2, 3]:
    # Load GRM id
    grm_id_fn = f"/data/jerrylee/data/UKB/grm_rel/DOR{dor}_chrALL.grm.id"
    ids = np.loadtxt(grm_id_fn, dtype='S10')
    if len(ids.shape) > 1:
        ids = ids[:, 1]
    N = len(ids)
    id_dict = {}
    for i in range(0, N):
        ids_str = ids[i].decode("utf-8")
        id_dict[ids_str] = i
    
    # additive genetic matrix without genotype
    N = len(ids)
    S = np.eye(N)
    
    # id_dict의 키들을 set으로 변환 (문자열 형태로)
    valid_ids = set(id_dict.keys())

    # volid와 relid가 모두 valid_ids에 있는 행만 필터링
    filtered_df_pair = df_pair[
        (df_pair['volid'].astype(str).isin(valid_ids)) & 
        (df_pair['relid'].astype(str).isin(valid_ids))
    ]
    filtered_df_pair = filtered_df_pair[filtered_df_pair["DOR"] == dor]

    print(f"DOR {dor}", flush=True)
    print(filtered_df_pair.groupby("DOR").size(), flush=True)
    
    for _, row in tqdm(filtered_df_pair.iterrows(), total=len(filtered_df_pair), desc=f'DOR {dor}'):
        d = row['DOR']
        if d == dor:
            # Get indices from id_dict
            i = id_dict[str(row['volid'])]
            j = id_dict[str(row['relid'])]
            
            S[i,j] = 1
            S[j,i] = 1
            
    # save GRM
    output_bin_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S{dor}.grm.bin"  # 저장할 파일 이름

    # 행렬의 하삼각 부분만 추출 (열 우선 순서로)
    lower_tri = S[np.tril_indices(N)]

    # binary 파일로 저장
    lower_tri.astype(np.float32).tofile(output_bin_fn)

    # id 파일 저장
    output_id_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S{dor}.grm.id"
    with open(output_id_fn, 'w') as f:
        for i, id_str in enumerate(ids):
            f.write(f"{id_str.decode('utf-8')}\t{id_str.decode('utf-8')}\n")


DOR 1
DOR
1    8349
dtype: int64


DOR 1: 100%|██████████| 8349/8349 [00:00<00:00, 14242.43it/s]


DOR 2
DOR
2    2468
dtype: int64


DOR 2: 100%|██████████| 2468/2468 [00:00<00:00, 14972.53it/s]


DOR 3
DOR
3    12159
dtype: int64


DOR 3: 100%|██████████| 12159/12159 [00:00<00:00, 14261.74it/s]


small GRM for dor 1, 2, 3 all

In [6]:
# make dor1+2+3 (3000 pair for each DOR)
df_123 = df_pair.groupby('DOR').apply(lambda x: x.sample(n=min(len(x), 1000), random_state=42)).reset_index(drop=True)

df_123

Unnamed: 0,DOR,rcode,relationship,volid,relid,volage,relage,volsex,relsex,Erx
0,1,PC,daughter-mother,1048482,1999157,65,41,F,F,0.500000
1,1,SB,daughter-sister,1205997,3926903,49,51,F,F,0.750000
2,1,,,3483091,4723547,64,59,F,F,
3,1,PC,daughter-mother,1083832,5441182,40,62,F,F,0.500000
4,1,SB,daughter-sister,2780693,4730772,45,43,F,F,0.750000
...,...,...,...,...,...,...,...,...,...,...
2995,3,1C,son-mother-brother-daughter,3861713,5147538,57,61,F,M,0.176777
2996,3,1C,son-(father-brother)/(father-sister)-daughter,5177760,5794582,58,57,F,M,0.000000
2997,3,,,1331079,1582951,61,52,F,M,
2998,3,,,1670968,5390636,60,63,F,F,


In [27]:
# df_123에서 unique ID 추출
unique_ids = pd.concat([df_123['volid'], df_123['relid']]).unique()
unique_ids.sort()  # ID 정렬
N = len(unique_ids)

# 새로운 id_dict 생성
id_dict = {str(id_): i for i, id_ in enumerate(unique_ids)}

# Initialize eA with identity matrix
eA = np.eye(N)

# Fill eA matrix based on df_123 information
for _, row in tqdm(df_123.iterrows(), total=len(df_123), desc='Creating eA matrix'):
    # Get indices from id_dict
    i = id_dict[str(row['volid'])]
    j = id_dict[str(row['relid'])]
    d = row['DOR']
    
    # Fill both (i,j) and (j,i) positions due to symmetry
    eA[i,j] = 0.5**d
    eA[j,i] = 0.5**d

# GRM을 binary 파일로 저장
output_bin_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.eA_123.grm.bin"

# 행렬의 하삼각 부분만 추출 (열 우선 순서로)
lower_tri = eA[np.tril_indices(N)]

# binary 파일로 저장
lower_tri.astype(np.float32).tofile(output_bin_fn)

# id 파일 저장
output_id_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.eA_123.grm.id"
with open(output_id_fn, 'w') as f:
        for id_str, _ in id_dict.items():
            f.write(f"{id_str}\t{id_str}\n")

Creating eA matrix:   0%|          | 0/3000 [00:00<?, ?it/s]

Creating eA matrix: 100%|██████████| 3000/3000 [00:00<00:00, 15405.53it/s]


additive using SNP

In [22]:
# additive using SNP
def load_grm(base_path):
    # GRM path
    grm_id_fn = f"{base_path}.grm.id"
    grm_bin_fn = f"{base_path}.grm.bin"
    
    # read ID file
    ids = np.loadtxt(grm_id_fn, dtype='S10')
    if len(ids.shape) > 1:
        ids = ids[:, 1]
    
    # ID dictionary (ID : index)
    N = len(ids)
    id_dict = {ids[i].decode("utf-8"): i for i in range(N)}
    
    # read GRM
    R = np.zeros((N, N), dtype=np.float32)
    R[np.tril_indices(N)] = np.fromfile(grm_bin_fn, dtype=np.float32)
    R = R + R.T
    R[np.diag_indices(N)] = np.diag(R) / 2.0
    
    return id_dict, R

# shared env matrix
A = np.eye(N)

for dor in [1, 2, 3]:
    snp_grm_path = f"/data/jerrylee/data/UKB/grm_rel/DOR{dor}_chrALL"
    snp_ids, snp_grm = load_grm(snp_grm_path)
    
    # id_dict의 각 ID에 대해 SNP GRM 값 채우기
    for id1, i in id_dict.items():
        for id2, j in id_dict.items():
            # id1과 id2가 모두 snp_ids에 있는 경우에만 값을 채움
            if id1 in snp_ids and id2 in snp_ids:
                i_snp = snp_ids[id1]
                j_snp = snp_ids[id2]
                A[i,j] = snp_grm[i_snp, j_snp]
                A[j,i] = snp_grm[i_snp, j_snp]

In [23]:
# 0이 아닌 값들만 추출하여 Series로 변환
non_zero_values = pd.Series(A[A != 0].flatten())

# 범위 설정 (예: -0.2에서 1.0까지 0.1 간격)
bins = np.arange(-0.2, 1.1, 0.1)

# cut 함수를 사용하여 범위별로 count
value_counts = pd.cut(non_zero_values, bins=bins).value_counts().sort_index()

# 결과 출력
print(value_counts)

(-0.2, -0.1]          0
(-0.1, 0.0]     1201828
(0.0, 0.1]      1126816
(0.1, 0.2]          596
(0.2, 0.3]          564
(0.3, 0.4]           22
(0.4, 0.5]          350
(0.5, 0.6]          274
(0.6, 0.7]            2
(0.7, 0.8]            0
(0.8, 0.9]            0
(0.9, 1.0]         4855
dtype: int64


In [41]:
# GRM을 binary 파일로 저장
output_bin_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.A_123.grm.bin"

# 행렬의 하삼각 부분만 추출 (열 우선 순서로)
lower_tri = A[np.tril_indices(N)]

# binary 파일로 저장
lower_tri.astype(np.float32).tofile(output_bin_fn)

# id 파일 저장
output_id_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.A_123.grm.id"
with open(output_id_fn, 'w') as f:
        for id_str, _ in id_dict.items():
            f.write(f"{id_str}\t{id_str}\n")

In [28]:
# shared env matrix
for dor in [1, 2, 3]:
    S = np.eye(N)
    tmp_pair = df_123[df_123['DOR'] == dor].copy()
    for _, row in tqdm(tmp_pair.iterrows(), total=len(tmp_pair), desc=f'DOR {dor}'):
        d = row['DOR']
        if d == dor:
            # Get indices from id_dict
            i = id_dict[str(row['volid'])]
            j = id_dict[str(row['relid'])]
            
            S[i,j] = 1
            S[j,i] = 1
            
    # save GRM
    output_bin_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S{dor}_123.grm.bin"  # 저장할 파일 이름

    # 행렬의 하삼각 부분만 추출 (열 우선 순서로)
    lower_tri = S[np.tril_indices(N)]

    # binary 파일로 저장
    lower_tri.astype(np.float32).tofile(output_bin_fn)

    # id 파일 저장
    output_id_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S{dor}_123.grm.id"
    with open(output_id_fn, 'w') as f:
        for id_str, _ in id_dict.items():
            f.write(f"{id_str}\t{id_str}\n")

DOR 1: 100%|██████████| 1000/1000 [00:00<00:00, 13128.33it/s]
DOR 2: 100%|██████████| 1000/1000 [00:00<00:00, 15236.72it/s]
DOR 3: 100%|██████████| 1000/1000 [00:00<00:00, 15159.24it/s]


single shared env matrix (w_s =

In [37]:
# single shared env matrix 
S = np.eye(N)
tmp_pair = df_123.copy()
for _, row in tqdm(tmp_pair.iterrows(), total=len(tmp_pair)):
    d = row['DOR']
    
    # Get indices from id_dict
    i = id_dict[str(row['volid'])]
    j = id_dict[str(row['relid'])]
    
    S[i,j] = (1/100) ** (d - 1)
    S[j,i] = (1/100) ** (d - 1)

100%|██████████| 3000/3000 [00:00<00:00, 15599.11it/s]


In [38]:
# check
dor = 3
# 샘플 3개 추출
sample_pairs = tmp_pair[tmp_pair['DOR'] == dor].sample(3)

# volid와 relid를 각각 리스트로 저장
vol_list = sample_pairs['volid'].tolist()
rel_list = sample_pairs['relid'].tolist()

# 또는 한 번에 두 ID를 튜플 리스트로 저장하는 방법
pair_list = list(zip(sample_pairs['volid'], sample_pairs['relid']))

for k in range(len(pair_list)):
    i = id_dict[str(pair_list[k][0])]
    j = id_dict[str(pair_list[k][1])]
    print(S[i,j], S[j,i])

0.0001 0.0001
0.0001 0.0001
0.0001 0.0001


In [39]:
S

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [40]:
# save GRM
output_bin_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S_123_w_100.grm.bin"  # 저장할 파일 이름

# 행렬의 하삼각 부분만 추출 (열 우선 순서로)
lower_tri = S[np.tril_indices(N)]

# binary 파일로 저장
lower_tri.astype(np.float32).tofile(output_bin_fn)

# id 파일 저장
output_id_fn = f"/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/UKB.S_123_w_100.grm.id"

with open(output_id_fn, 'w') as f:
        for id_str, _ in id_dict.items():
            f.write(f"{id_str}\t{id_str}\n")

# Check

In [62]:
grm_id_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.S3.grm.id"
grm_bin_fn = "/data/jerrylee/pjt/BIGFAM.v.2.0/data/other-methods/A/GS.S3.grm.bin"

ids = np.loadtxt(grm_id_fn, dtype='S10')
if len(ids.shape) > 1:
    ids = ids[:, 1]
N = len(ids)
id_dict = {}
for i in range(0, N):
    ids_str = ids[i].decode("utf-8")
    id_dict[ids_str] = i

# read GRM
N = len(ids)
R = np.zeros((N, N), dtype=np.float32)
R[np.tril_indices(N)] = np.fromfile(grm_bin_fn, dtype=np.float32)
R = R + R.T
R[np.diag_indices(N)] = np.diag(R) / 2.0

In [63]:
R

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [66]:
# check
dor = 1
# 샘플 3개 추출
sample_pairs = df_pair[df_pair['DOR'] == dor].sample(3)

# volid와 relid를 각각 리스트로 저장
vol_list = sample_pairs['volid'].tolist()
rel_list = sample_pairs['relid'].tolist()

# 또는 한 번에 두 ID를 튜플 리스트로 저장하는 방법
pair_list = list(zip(sample_pairs['volid'], sample_pairs['relid']))

for k in range(len(pair_list)):
    i = id_dict[str(pair_list[k][0])]
    j = id_dict[str(pair_list[k][1])]
    print(R[i,j], R[j,i])

0.0 0.0
0.0 0.0
0.0 0.0
