In [1]:
from lxml.html import parse
from sys import stdin
import pandas as pd
import re
import json
import dicom2nifti
import numpy as np
import os
import skimage
import pydicom

In [323]:
case = "covid-19-pneumonia-40"

In [324]:
!wget -O case-{case}.htm  https://radiopaedia.org/cases/{case}

--2020-03-30 19:15:59--  https://radiopaedia.org/cases/covid-19-pneumonia-40
Resolving radiopaedia.org (radiopaedia.org)... 104.26.8.61, 104.26.9.61
Connecting to radiopaedia.org (radiopaedia.org)|104.26.8.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘case-covid-19-pneumonia-40.htm’

case-covid-19-pneum     [  <=>               ]  44.95K   121KB/s    in 0.4s    

2020-03-30 19:16:00 (121 KB/s) - ‘case-covid-19-pneumonia-40.htm’ saved [46024]



In [325]:
!grep "data-study-id" case-{case}.htm

</div></div></div><div class="well case-section case-study" data-ref="study-86625" data-study-id="86625" data-study-stacks-url="/studies/86625/stacks?lang=us"><div class="main-study-desc">
<div class="sub-section study-findings body"><p>There are pronounced bilateral confluent ground-glass opacities, mostly in the periphery of the lungs.</p><p>No pleural effusion or adenopathy.</p><p>The 360° VRT shows extensive bilateral lung damage.</p></div></div><div class="well case-section case-study" data-ref="study-86629" data-study-id="86629" data-study-stacks-url="/studies/86629/stacks?lang=us"><div class="main-study-desc">


In [326]:
stackid = "86625"

In [327]:
!wget -O stacks-{stackid}.json https://radiopaedia.org/studies/{stackid}/stacks

--2020-03-30 19:16:39--  https://radiopaedia.org/studies/86625/stacks
Resolving radiopaedia.org (radiopaedia.org)... 104.26.9.61, 104.26.8.61
Connecting to radiopaedia.org (radiopaedia.org)|104.26.9.61|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/json]
Saving to: ‘stacks-86625.json’

stacks-86625.json       [ <=>                ] 206.52K  --.-KB/s    in 0.08s   

2020-03-30 19:16:40 (2.40 MB/s) - ‘stacks-86625.json’ saved [211474]



In [328]:
y = json.load(open("stacks-{}.json".format(stackid)))

In [329]:
len(y)

5

In [330]:
[y[i]["modality"] for i in range(len(y))]

['CT', 'CT', 'CT', 'CT', 'CT']

In [331]:
index = 0

In [332]:
urls = []
positions = []
for k in y[index]["images"]:
    urls.append(k["fullscreen_filename"]) 
    positions.append(k["position"])

In [333]:
urls = np.asarray(urls)[np.argsort(positions)]

In [334]:
folder = "radiopaedia_org_" + case + "_" + str(stackid) + "_" + str(index)
folder

'radiopaedia_org_covid-19-pneumonia-40_86625_0'

In [335]:
!mkdir -p {folder}
for i, url in enumerate(urls):
    !wget -q -O {folder + "/" + str(i).zfill(4) + "_" + folder}.jpeg {url}

In [341]:
import pydicom
from pydicom.dataset import Dataset, FileDataset
from pydicom.uid import ExplicitVRLittleEndian
import pydicom._storage_sopclass_uids


def convert(img, filename, DCM_SliceLocation, length):
    image2d = img.astype(np.uint16)


    meta = pydicom.Dataset()
    meta.MediaStorageSOPClassUID = pydicom._storage_sopclass_uids.CTImageStorage
    meta.MediaStorageSOPInstanceUID = pydicom.uid.generate_uid()
    meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian  
    
    ds = pydicom.dataset.FileDataset("test2.dcm", {},
                                 file_meta=meta, preamble=b"\0" * 128)

    ds.is_little_endian = True
    ds.is_implicit_VR = False

    ds.SOPClassUID = pydicom._storage_sopclass_uids.MRImageStorage
    ds.PatientName = "Test^Firstname"
    ds.PatientID = "123456"

    ds.Modality = "CT"
    ds.SeriesInstanceUID = pydicom.uid.generate_uid()
    ds.StudyInstanceUID = pydicom.uid.generate_uid()
    ds.FrameOfReferenceUID = pydicom.uid.generate_uid()

    ds.BitsStored = 16
    ds.BitsAllocated = 16
    ds.SamplesPerPixel = 1
    ds.HighBit = 15
    ds.SliceLocation = DCM_SliceLocation
    ds.SpacingBetweenSlices = 1
    #ds.SliceThickness = 4
    ds.ScanLength = length

    ds.ImagesInAcquisition = "1"

    ds.Rows = image2d.shape[0]
    ds.Columns = image2d.shape[1]
    ds.InstanceNumber = 1

    ds.ImagePositionPatient = r"-159\-174"+ "\\-" + str(DCM_SliceLocation*4)  #default of 6, sometimes 1
    ds.ImageOrientationPatient = r"1\0\0\0\-1\0"
    ds.ImageType = r"ORIGINAL\PRIMARY\AXIAL"

    ds.RescaleIntercept = "0"
    ds.RescaleSlope = "1"
    ds.PixelSpacing = r"0.683594\0.683594"# r"1\1"
    ds.PhotometricInterpretation = "MONOCHROME2"
    ds.PixelRepresentation = 1

    pydicom.dataset.validate_file_meta(ds.file_meta, enforce_standard=True)

    ds.PixelData = image2d.tobytes()

    ds.save_as(filename)

In [344]:
!mkdir -p {folder}-dcm
for f in os.listdir(folder):
    position = int(f.split("_")[0])
    img = skimage.io.imread(folder+"/"+f)
    if len(img.shape) > 2:
        img = img[:,:,0]
    convert(img, folder+"-dcm/"+f+".dcm", position, len(os.listdir(folder)))
    

In [345]:
dicom2nifti.convert_dicom.dicom_series_to_nifti(folder+"-dcm", folder+"-dcm" +".nii.gz", reorient_nifti=True)


{'NII_FILE': 'radiopaedia_org_covid-19-pneumonia-40_86625_0-dcm.nii.gz',
 'NII': <nibabel.nifti1.Nifti1Image at 0x112693790>,
 'MAX_SLICE_INCREMENT': 4.0}