In [None]:
!pip install torchxrayvision

In [None]:
# First, authentication is needed to access the database
from google.cloud import bigquery
from google.colab import auth, drive
auth.authenticate_user()
drive.mount('gdrive/My Drive')

Authenticated


In [None]:
# Load the necessary modules
import os
import pandas as pd

from IPython.display import clear_output

# Setting configuration variables
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 50)
dataset = 'mimiciv'
project_id="coherent-code-395904"
db = 'physionet-data'
os.environ["GOOGLE_CLOUD_PROJECT"]=project_id

# Defining the function to retrieve a dataframe using SQL query
def run_query(query: str, project_id: str=project_id):
  r"""
  Runs a query on the project database using BigQuery

  Params
  ---
  - `query`: String of the SQL query
  - `project_id`: Project ID on BigQuery

  Returns
  ---
  df: the extracted database in the form of `pandas.DataFrame`
  """
  return pd.io.gbq.read_gbq(query, project_id=project_id, dialect='standard')

In [None]:
cxr = run_query(f"""
SELECT subject_id, path
  FROM `physionet-data.mimic_cxr.record_list`
  WHERE subject_id IN {...}
""")
cxr

In [None]:
!wget -r -N -c -np --user tuankhoin --password D!tc0nmemay https://physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg img.jpg
!wget -r -N -c -np --user tuankhoin --password D!tc0nmemay https://physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg img2.jpg

--2023-10-08 07:45:49--  https://physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg
Resolving physionet.org (physionet.org)... 18.18.42.54
Connecting to physionet.org (physionet.org)|18.18.42.54|:443... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="PhysioNet", charset="UTF-8"
Reusing existing connection to physionet.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 1430084 (1.4M) [image/jpeg]
Saving to: ‘physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg’


2023-10-08 07:45:50 (1.36 MB/s) - ‘physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg’ saved [1430084/1430084]

--2023-10-08 07:45:50--  http://img.jpg/
Resolving img.jpg (img.jpg)... failed: Name or service not known.
wget: unable to resolve host address ‘img

In [None]:
import torchxrayvision as xrv
import skimage
import cv2
import matplotlib.pyplot as plt
import torch, torchvision
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
classes = ['Tuberculoisis', 'Pneumonia', 'Bacteria']
img_path = '/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg'
img_path2 = '/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg'
path = '/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/'
# img = cv2.imread(img_path).mean(2)#[:,:,::-1]
# plt.imshow(img)

In [None]:
glob.glob('/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/**/*.jpg', recursive=True)

['/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/02aa804e-bde0afdd-112c0b34-7bc16630-4e384014.jpg',
 '/content/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s50414267/174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962.jpg']

[Constructing the dataset in Pytorch](https://pytorch.org/tutorials/beginner/basics/data_tutorial.html#creating-a-custom-dataset-for-your-files)

In [None]:
import os
import glob
from torch.utils.data import Dataset

class MIMICDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):
        self.img_dir = img_dir
        self.dir = glob.glob(img_dir+'*')
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        image = skimage.io.imread(self.dir[idx])
        image = xrv.datasets.normalize(image, 255)
        if self.transform:
            image = self.transform(image[None, ...])
        return torch.from_numpy(image)

transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),xrv.datasets.XRayResizer(512)])
data = MIMICDataset(path, transform=transform)
data[0]

Setting XRayResizer engine to cv2 could increase performance.


tensor([[[ -823.3372,  -918.1315,  -918.2033,  ...,  -918.2033,
           -918.1315,  -823.3372],
         [ -918.1315, -1023.8400, -1023.9200,  ..., -1023.9200,
          -1023.8400,  -918.1315],
         [ -918.2033, -1023.9200, -1024.0000,  ..., -1024.0000,
          -1023.9200,  -918.2033],
         ...,
         [ -918.2033, -1023.9200, -1024.0000,  ..., -1024.0000,
          -1023.9200,  -918.2033],
         [ -918.1315, -1023.8400, -1023.9200,  ..., -1023.9200,
          -1023.8400,  -918.1315],
         [ -823.3372,  -918.1315,  -918.2033,  ...,  -918.2033,
           -918.1315,  -823.3372]]])

In [None]:
loader = torch.utils.data.DataLoader(data, batch_size=1, shuffle=False)

In [None]:
model = xrv.models.ResNet(weights="resnet50-res512-all")
model.to(device)
activation = {}

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook
model.model.fc.register_forward_hook(get_activation('fc'))

latent = []
for i in data:
  i.to(device)
  output = model(i[None,...])
  latent.append(activation['fc'])

torch.stack(latent)

tensor([[[-3.4002e+00, -3.8694e+00, -1.7422e+00, -5.9851e+00, -4.8911e+00,
          -6.0759e+00, -5.8155e+00, -2.3434e+00, -4.5558e+00, -3.8003e+00,
          -6.5281e+00, -1.4260e+00, -3.5992e+00, -1.0810e+01,  5.1805e-40,
          -2.4969e+00, -3.9474e+00,  1.9564e-39]],

        [[-2.0501e+00, -2.7452e+00, -9.8228e-01, -2.1348e+00, -5.5666e+00,
          -3.5782e+00, -3.8323e+00, -2.7561e+00, -2.4668e+00, -3.4668e+00,
          -6.9342e+00, -3.5645e+00, -4.3610e+00, -6.9339e+00,  2.3493e-40,
          -2.5121e+00, -1.6962e+00,  1.2481e-39]]])

In [None]:
from torchsummary import summary
model = xrv.models.DenseNet(weights="densenet121-res224-all")
summary(model,(1,512,512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           3,136
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
         MaxPool2d-4         [-1, 64, 128, 128]               0
            Conv2d-5         [-1, 64, 128, 128]           4,096
       BatchNorm2d-6         [-1, 64, 128, 128]             128
              ReLU-7         [-1, 64, 128, 128]               0
            Conv2d-8         [-1, 64, 128, 128]          36,864
       BatchNorm2d-9         [-1, 64, 128, 128]             128
             ReLU-10         [-1, 64, 128, 128]               0
           Conv2d-11        [-1, 256, 128, 128]          16,384
      BatchNorm2d-12        [-1, 256, 128, 128]             512
           Conv2d-13        [-1, 256, 128, 128]          16,384
      BatchNorm2d-14        [-1, 256, 1

In [None]:
import torchxrayvision as xrv
import skimage, torch, torchvision

# Prepare the image:
img = skimage.io.imread(img_path)
#img = xrv.datasets.normalize(img, 255) # convert 8-bit image to [-1024, 1024] range
img = img[None, ...] # Make single color channel

transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),xrv.datasets.XRayResizer(224)])

img = transform(img)
img = torch.from_numpy(img)

# Load model and process image
model = xrv.models.DenseNet(weights="densenet121-res224-all")
outputs = model(img[None,...]) # or model.features(img[None,...])

# Print results
dict(zip(model.pathologies,outputs[0].detach().numpy()))