In [17]:
import time
from pathlib import Path
import numpy as np
from bed_reader import create_bed, open_bed

In [16]:
iid_count = 250_000
sid_count = 1_000_000

ssd_path = Path("m:/deldir/bench2")
file_path = ssd_path / f"{iid_count}x{sid_count}mode0.bed"

snp_row = np.array(range(sid_count)) % 3
snp_row = snp_row.astype(np.uint8)

start_time = time.time()
with create_bed(file_path, iid_count=iid_count, sid_count=sid_count, major="individual") as bed_writer:
    for iid_index in range(iid_count):
        if iid_index % 1_000 == 0:
            current_time = time.time()
            elapsed_time = current_time - start_time
            if iid_index > 0:
                estimated_total_time = elapsed_time / iid_index * iid_count
                time_remaining = estimated_total_time - elapsed_time
                print(f"iid_index={iid_index:_}, Time elapsed: {elapsed_time:.2f} s, Estimated total time: {estimated_total_time:.2f} s, Time remaining: {time_remaining:.2f} s")
            else:
                print(f"Starting processing at iid_index={iid_index:_}")
        bed_writer.write(snp_row)
        snp_row[iid_index] = (snp_row[iid_index] + 1) % 3
total_time = time.time() - start_time
print(f"Processing complete. Total time taken: {total_time:.2f} s")        


Starting processing at iid_index=0
iid_index=1_000, Time elapsed: 3.67 s, Estimated total time: 916.69 s, Time remaining: 913.02 s
iid_index=2_000, Time elapsed: 4.93 s, Estimated total time: 616.84 s, Time remaining: 611.91 s
iid_index=3_000, Time elapsed: 6.20 s, Estimated total time: 516.71 s, Time remaining: 510.51 s
iid_index=4_000, Time elapsed: 7.45 s, Estimated total time: 465.92 s, Time remaining: 458.46 s
iid_index=5_000, Time elapsed: 8.71 s, Estimated total time: 435.52 s, Time remaining: 426.81 s
iid_index=6_000, Time elapsed: 9.97 s, Estimated total time: 415.45 s, Time remaining: 405.48 s
iid_index=7_000, Time elapsed: 11.24 s, Estimated total time: 401.40 s, Time remaining: 390.16 s
iid_index=8_000, Time elapsed: 12.50 s, Estimated total time: 390.52 s, Time remaining: 378.02 s
iid_index=9_000, Time elapsed: 13.76 s, Estimated total time: 382.14 s, Time remaining: 368.38 s
iid_index=10_000, Time elapsed: 15.02 s, Estimated total time: 375.41 s, Time remaining: 360.39 s


In [19]:
with open_bed(file_path) as bed_reader:
    val = bed_reader.read(np.s_[:10,:10])
val

array([[0., 1., 2., 0., 1., 2., 0., 1., 2., 0.],
       [1., 1., 2., 0., 1., 2., 0., 1., 2., 0.],
       [1., 2., 2., 0., 1., 2., 0., 1., 2., 0.],
       [1., 2., 0., 0., 1., 2., 0., 1., 2., 0.],
       [1., 2., 0., 1., 1., 2., 0., 1., 2., 0.],
       [1., 2., 0., 1., 2., 2., 0., 1., 2., 0.],
       [1., 2., 0., 1., 2., 0., 0., 1., 2., 0.],
       [1., 2., 0., 1., 2., 0., 1., 1., 2., 0.],
       [1., 2., 0., 1., 2., 0., 1., 2., 2., 0.],
       [1., 2., 0., 1., 2., 0., 1., 2., 0., 0.]], dtype=float32)

In [32]:
bed_reader = open_bed(file_path) # open and keep open

In [40]:
bed_reader.shape

(250000, 1000000)

In [41]:
val = bed_reader.read(np.s_[:,0:1000],dtype=np.int8)
val.shape
for column in val.T:
    print(len(column))
    break

250000


In [42]:
sid_at_a_time = 1000
file_path1 = ssd_path / f"{iid_count}x{sid_count}mode1.bed"
start_time = time.time()

with create_bed(file_path1, iid_count=bed_reader.iid_count, sid_count=bed_reader.sid_count, major="SNP") as bed_writer:
    for sid_index in range(0,bed_reader.sid_count, sid_at_a_time):
        if sid_index % 1 == 0:
            current_time = time.time()
            elapsed_time = current_time - start_time
            if sid_index > 0:
                estimated_total_time = elapsed_time / sid_index * sid_count
                time_remaining = estimated_total_time - elapsed_time
                print(f"sid_index={sid_index:_}, Time elapsed: {elapsed_time:.2f} s, Estimated total time: {estimated_total_time:.2f} s, Time remaining: {time_remaining:.2f} s")
            else:
                print(f"Starting processing at sid_index={sid_index:_}")
        iid_column_by_chunk = bed_reader.read(np.s_[:,sid_index:sid_index+sid_at_a_time],dtype=np.int8)
        for iid_column in iid_column_by_chunk.T:
            bed_writer.write(iid_column)
total_time = time.time() - start_time
print(f"Processing complete. Total time taken: {total_time:.2f} s")        


Starting processing at sid_index=0
sid_index=1_000, Time elapsed: 24.70 s, Estimated total time: 24702.03 s, Time remaining: 24677.32 s
sid_index=2_000, Time elapsed: 28.25 s, Estimated total time: 14124.00 s, Time remaining: 14095.75 s
sid_index=3_000, Time elapsed: 31.91 s, Estimated total time: 10635.47 s, Time remaining: 10603.56 s
sid_index=4_000, Time elapsed: 35.71 s, Estimated total time: 8928.68 s, Time remaining: 8892.97 s
sid_index=5_000, Time elapsed: 39.40 s, Estimated total time: 7879.06 s, Time remaining: 7839.66 s
sid_index=6_000, Time elapsed: 43.14 s, Estimated total time: 7189.40 s, Time remaining: 7146.26 s
sid_index=7_000, Time elapsed: 46.75 s, Estimated total time: 6678.53 s, Time remaining: 6631.78 s
sid_index=8_000, Time elapsed: 50.56 s, Estimated total time: 6320.46 s, Time remaining: 6269.90 s
sid_index=9_000, Time elapsed: 54.34 s, Estimated total time: 6037.75 s, Time remaining: 5983.41 s
sid_index=10_000, Time elapsed: 58.09 s, Estimated total time: 5809.