In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install torchxrayvision
!cp ./drive/MyDrive/mimic-cxr/rrs-mimic-cxr.zip rrs-mimic-cxr.zip
!unzip -q rrs-mimic-cxr.zip

Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchxrayvision
  Downloading torchxrayvision-1.0.1-py3-none-any.whl (29.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.0/29.0 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchxrayvision
Successfully installed torchxrayvision-1.0.1


In [None]:
import numpy as np
import pandas as pd
import torch, torchvision
import torchxrayvision as xrv
import skimage, skimage.io
from torch.utils.data import Dataset, DataLoader

class CXRDataset(Dataset):
    def __init__(self, csv_file,start_idx):
        df1 = pd.read_csv(csv_file,sep='\t',header=None)
        df1.columns = ['images']
        df1['images'] = df1['images'].apply(lambda x:list(x.split(",")))
        df1 = df1.explode('images',ignore_index=True)
        self.data = df1.iloc[start_idx:,:].reset_index(drop=True)

        self.transform = torchvision.transforms.Compose([xrv.datasets.XRayCenterCrop(),
                                            xrv.datasets.XRayResizer(224)])

        # self.transform = preprocess
    
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        
        img = skimage.io.imread(self.data.loc[idx,'images'])
        img = xrv.datasets.normalize(img,255)
        #img = img[:, :, 0]
        img = img[None, :, :]
        if self.transform is not None:
          x = self.transform(img)
        return x

In [None]:
img_dataset = CXRDataset('/content/train.image.tok',230400)

dataloader = DataLoader(img_dataset, batch_size=128)

In [None]:
model = xrv.models.DenseNet(weights="densenet121-res224-chex")


Downloading weights...
If this fails you can run `wget https://github.com/mlmed/torchxrayvision/releases/download/v1/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt -O /root/.torchxrayvision/models_data/chex-densenet121-d121-tw-lr001-rot45-tr15-sc15-seed0-best.pt`
[██████████████████████████████████████████████████]


In [None]:
from tqdm import tqdm
tqdm.pandas()
import torch.nn.functional as F
model.to(device='cuda')

XRV-DenseNet121-densenet121-res224-chex

In [None]:
res = np.zeros((1,1024)) #np.array([[-1,-1,-1,-1]])

i = 0
for batch in tqdm(dataloader):
  with torch.no_grad():
    feats = model.features(batch.to(device='cuda'))
    feats = F.relu(feats, inplace=True)
    feats = F.adaptive_avg_pool2d(feats, (1, 1))
    res = np.vstack([res,feats.cpu().detach().numpy().reshape(batch.shape[0],-1)])
    i+=1

100%|██████████| 20/20 [00:47<00:00,  2.36s/it]


In [None]:
np.savez_compressed('chexpert_embeddings_train_p2.npz',a=res)

In [None]:
!cp chexpert_embeddings_train_p2.npz ./drive/MyDrive/chexpert_embeddings_train_p2.npz

In [None]:
!cp ./drive/MyDrive/chexpert_embeddings.npz chexpert_embeddings.npz 

In [None]:
prev_res = np.load('chexpert_embeddings.npz')['a']
prev_res.shape

(230401, 1024)

In [None]:
final_embed = np.vstack([prev_res[1::],res[1:,:]])
final_embed.shape

(232855, 1024)

In [None]:
np.savez_compressed('chexpert_embeddings_train.npz',a=final_embed)

In [None]:
!cp chexpert_embeddings_train.npz ./drive/MyDrive/chexpert_embeddings_train.npz

In [None]:
230400/3072

75.0

In [None]:
df1 = pd.read_csv('train.image.tok',sep='\t',header=None)
df1.columns = ['images']
df1['images'] = df1['images'].apply(lambda x:list(x.split(",")))
df1 = df1.explode('images',ignore_index=True)
df1

Unnamed: 0,images
0,mimic-cxr-images-512/files/p10/p10000032/s5041...
1,mimic-cxr-images-512/files/p10/p10000032/s5041...
2,mimic-cxr-images-512/files/p10/p10000032/s5318...
3,mimic-cxr-images-512/files/p10/p10000032/s5318...
4,mimic-cxr-images-512/files/p10/p10000032/s5391...
...,...
232850,mimic-cxr-images-512/files/p19/p19999733/s5713...
232851,mimic-cxr-images-512/files/p19/p19999733/s5713...
232852,mimic-cxr-images-512/files/p19/p19999733/s5713...
232853,mimic-cxr-images-512/files/p19/p19999987/s5862...
