In [6]:
import pydicom
from pathlib import Path
import re
import numpy as np

loc = '/run/media/anton/Elements/UKB/20204/Bulk/Liver MRI/ShMOLLI/'
sub_dcms = dict()
count = 0

for i in range(10,33):
    for ent in Path(f'{loc}/{i}').iterdir():
        if not ent.is_dir():
            continue
        mtch = re.match('(\\d+)_20204_2_0', ent.name)
        if not mtch:
            continue
        sub = mtch.group(1)
        
        dcm_paths = []
        for dcm_cand in Path(f'{loc}/{i}/{sub}_20204_2_0').iterdir():
            if re.match('.*\\.dcm$', dcm_cand.name):
                dcm_paths.append(dcm_cand)

        dcms = []
        for path in dcm_paths:
            d = pydicom.dcmread(path)
            dcms.append(d)
            
        series_numbers = set()
        for dcm in dcms:
            series_numbers.add(int(dcm.SeriesNumber))
        series_numbers = sorted(list(series_numbers))
    
        first_dcms = []
        for dcm in dcms:
            if int(dcm.SeriesNumber) == series_numbers[0]:
                first_dcms.append(dcm)
    
        first_dcms.sort(key=lambda dcm: int(dcm.InstanceNumber))
    
        for dcm in dcms:
            if int(dcm.SeriesNumber) == series_numbers[0] and int(dcm.InstanceNumber) == 1:
                if dcm.pixel_array.shape == (288, 384) and not np.any(np.isnan(dcm.pixel_array)):
                    sub_dcms[sub] = dcm.pixel_array
                break

        if count % 50 == 0:
            print(f'Done {count} {sub} {len(sub_dcms)}')

        count += 1

print('Done')

Done 0 1099545 1
Done 50 1038346 51
Done 100 1091560 101
Done 150 1043419 151
Done 200 1071592 201
Done 250 1080525 251
Done 300 1094644 301
Done 350 1011673 351
Done 400 1035018 401
Done 450 1089534 451
Done 500 1053612 501
Done 550 1027758 551
Done 600 1092854 601
Done 650 1040138 651
Done 700 1076172 701
Done 750 1058108 751
Done 800 1015842 801
Done 850 1081979 851
Done 900 1008451 901
Done 950 1025654 951
Done 1000 1000177 1001
Done 1050 1008856 1051
Done 1100 1016863 1101
Done 1150 1024503 1151
Done 1200 1031614 1201
Done 1250 1040358 1251
Done 1300 1048962 1301
Done 1350 1056228 1351
Done 1400 1063659 1401
Done 1450 1069569 1451
Done 1500 1077746 1501
Done 1550 1084937 1551
Done 1600 1091027 1601
Done 1650 1097497 1651
Done 1700 1115680 1701
Done 1750 1160228 1751
Done 1800 1106282 1801
Done 1850 1103333 1851
Done 1900 1110237 1901
Done 1950 1189395 1951
Done 2000 1113497 2001
Done 2050 1135628 2051
Done 2100 1180484 2101
Done 2150 1168602 2151
Done 2200 1158635 2201
Done 2250 1

KeyboardInterrupt: 

In [7]:
import pickle

pickle.dump(sub_dcms, open('/run/media/anton/Elements/UKB/liver10-15.pkl', 'wb'))

print('Done')

Done


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        self.conv1 = nn.Conv2d(1,10,(7,7),stride=3).float().cuda()
        self.conv2 = nn.Conv2d(10,10,(7,7),stride=3).float().cuda()
        self.conv3 = nn.Conv2d(10,10,(7,7),stride=3).float().cuda()
        self.conv4 = nn.Conv2d(10,10,(7,7),stride=3).float().cuda()
        self.mp = nn.MaxPool2d((1,2)).float().cuda()

    def forward(self, x):
        N = x.shape[0]
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.mp(x)
        x = x.reshape(N,-1)
        return x

ext = FeatureExtractor()
ext.load_state_dict(torch.load('/run/media/anton/Elements/UKB/liver_feature_extract2.torch'))

print('Done')

  ext.load_state_dict(torch.load('/run/media/anton/Elements/UKB/liver_feature_extract2.torch'))


Done


In [11]:
liver_feats = dict()
count = 0

for sub,img in sub_dcms.items():
    x = torch.from_numpy(img).float().cuda().unsqueeze(0).unsqueeze(0)
    with torch.no_grad():
        z = ext(x)
        feat = z[0].cpu().numpy()
        liver_feats[sub] = feat
    if count % 500 == 0:
        print(f'Done {count} {len(liver_feats)}')
    count += 1

Done 0 1
Done 500 501
Done 1000 1001
Done 1500 1501
Done 2000 2001
Done 2500 2501
Done 3000 3001
Done 3500 3501
Done 4000 4001
Done 4500 4501
Done 5000 5001
Done 5500 5501
Done 6000 6001
Done 6500 6501
Done 7000 7001
Done 7500 7501
Done 8000 8001
Done 8500 8501
Done 9000 9001
Done 9500 9501
Done 10000 10001


In [12]:
import pickle

pickle.dump(liver_feats, open('/run/media/anton/Elements/UKB/liver_feats10-15.pkl', 'wb'))

print('Done')

Done
