In [1]:
### taking close look at One Image
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# torch dependence
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.autograd import Variable

In [2]:
import os, random, warnings, cv2, glob
import numpy as np
import numpy.linalg as la
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from skimage.measure import label # for labeling regions
from skimage.measure import regionprops # for shape analysis
from skimage.color import label2rgb # for making overlay plots
from skimage.io import imread # for reading images
from skimage.feature import greycomatrix, greycoprops

greyco_prop_list = ['contrast', 'homogeneity', 'correlation', 'ASM']
import matplotlib.pyplot as plt
import plotly.express as px
import lib.func as func
cs = px.colors.sequential.Plasma
print(cs)
import matplotlib as mpl
mpl.rcParams['font.family'] = 'Avenir'
plt.rcParams['font.size'] = 18
plt.rcParams['axes.linewidth'] = 2

['#0d0887', '#46039f', '#7201a8', '#9c179e', '#bd3786', '#d8576b', '#ed7953', '#fb9f3a', '#fdca26', '#f0f921']


In [3]:
labels = ['emphysema', 'fibrosis', 'ground glass', 'healthy', 'micronodules']
color = ['lightcoral', 'gold', 'turquoise', 'forestgreen', 'purple']

In [24]:
## functions
def aggr(df, n, labels = labels):
    df = df.rename(columns={0:'emphysema', 1:'fibrosis', 2:'ground glass', \
                            3:'healthy', 4:'micronodules'}).reset_index()
    wm = lambda x: np.dot(x, df.loc[x.index, 'weight'])
    re = df.groupby(['loc']).agg(wm)[labels].join(df.groupby(['loc']).sum()[['weight']])
    dfii = pd.DataFrame(re.sum()).T
    dfii.index = ['overall']
    re = re.append(dfii)
    for key in labels:
        re[key] = re[key]/re['weight']
    re['patient'] = n
    re = re.reset_index().set_index('patient')
    return re.rename({'index': 'loc'}, axis = 1)

def get_loc(images):
    length = len(images)
    l1 = length//3
    l2 = int(2*length//3)
    return l1, l2

def plot_result(df1, features, size = 20, scale = 3, ks=[]):
    dn = len(df1)//6 +1
    if not np.any(ks):
        ks = list(df1.keys())[1:-2]
    
    for k in ks:
        n = df1[k].argsort()[::-1]
        data = []
        display_grid = np.zeros((size, (size+1) * 6-1))+256
        for ind, j in enumerate(n[::dn]):
            
            display_grid[:, ind * size+ind : (ind+1) * size+ind] = features[j]
            
            data.append(pred_cl(features[j]))
        print(df1[k][n[::dn]], data)
        plt.figure( figsize=(scale * 6, scale) )
        plt.title ( k )
        plt.grid  ( False )
        plt.imshow( display_grid, aspect='auto', cmap='gray', vmax=20,vmin=0 )
        ax = plt.gca()
        ax.set_xticks([])
        ax.set_yticks([])
        plt.show()
        plt.clf()

## Object Detection pipeline
1. load model and osic dataset
2. run object detection on osic dataset by sliding window
3. visualize and save results

In [245]:
device = torch.device("cuda" if torch.cuda.is_available() 
                                  else "cpu")
if device =='cuda':
    torch.set_default_tensor_type('torch.cpu.FloatTensor')
size_IMG = 32

model = torch.load("./ConvNet_06022021_aug_alldata_12120p8", 
                   map_location=device) # load model
model.eval()

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=

In [246]:
datadir = '/home/dfeng/Jiayi-tmp/pre-data/'
try:
    if imgs:
        print('avoiding reloading image files')
except:
    print('loading')
    imgs = np.load(datadir + '/CT_img_tr-200-1000-HU-512-o.npy', allow_pickle= True).item()
# imgs = np.load('test_imgs.npy', allow_pickle= True).item()
order_paitent = np.load(datadir + 'sorted_paitent.npy', allow_pickle= True)


avoiding reloading image files


In [265]:
add_percentage = False

In [266]:
dp = 32
DF = pd.DataFrame()
for n in list(imgs.keys()):
    ulbdf = pd.DataFrame()
    l1, l2 = get_loc(imgs[n])
    for ind, image in enumerate(imgs[n]):        
        ps = func.get_patches(image, dp = dp)    
        if ps:
            torchimgs = func.imgs_to_torch(ps).cpu().to(device)
            cls = model(torchimgs)
            data = F.softmax(cls, dim = 1).detach().cpu().numpy()
            if add_percentage:
                dfi = pd.DataFrame(np.sum(np.array(data), axis = 0)/np.sum(np.array(data))).T
            else:
                d = np.zeros(np.shape(data))
                for i,j in enumerate(np.argmax(data, axis = 1)):
                    d[i,j] = 1
                dfi = pd.DataFrame(np.sum(d, axis = 0)/np.sum(np.array(data))).T
            dfi['weight'] = len(data)
            if ind < l1:
                dfi['loc'] = 'upper'
            elif ind >= l1 and ind < l2:
                dfi['loc'] = 'middle'
            else:
                dfi['loc'] = 'bottom'
            ulbdf = ulbdf.append(dfi)
    if any(ulbdf):
        DF = DF.append(aggr(ulbdf, n))
        print(n)

ID00007637202177411956430
ID00009637202177434476278
ID00010637202177584971671
ID00012637202177665765362
ID00014637202177757139317
ID00015637202177877247924
ID00019637202178323708467
ID00020637202178344345685
ID00023637202179104603099
ID00025637202179541264076
ID00026637202179561894768
ID00027637202179689871102
ID00030637202181211009029
ID00032637202181710233084
ID00035637202182204917484
ID00038637202182690843176
ID00042637202184406822975
ID00047637202184938901501
ID00048637202185016727717
ID00051637202185848464638
ID00060637202187965290703
ID00061637202188184085559
ID00062637202188654068490
ID00067637202189903532242
ID00068637202190879923934
ID00072637202198161894406
ID00073637202198167792918
ID00075637202198610425520
ID00076637202199015035026
ID00077637202199102000916
ID00078637202199415319443
ID00082637202201836229724
ID00086637202203494931510
ID00089637202204675567570
ID00090637202204766623410
ID00093637202205278167493
ID00094637202205333947361
ID00099637202206203080121
ID0010263720

In [268]:
px.box(DF[['emphysema', 'fibrosis', 'ground glass', 'healthy', 'micronodules','loc']], color='loc', 
       template='simple_white' )

In [269]:
df = DF.pivot(columns = ['loc'])
df.columns = df.columns.map('|'.join).str.strip('|')
df

Unnamed: 0_level_0,emphysema|bottom,emphysema|middle,emphysema|overall,emphysema|upper,fibrosis|bottom,fibrosis|middle,fibrosis|overall,fibrosis|upper,ground glass|bottom,ground glass|middle,...,healthy|overall,healthy|upper,micronodules|bottom,micronodules|middle,micronodules|overall,micronodules|upper,weight|bottom,weight|middle,weight|overall,weight|upper
patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ID00007637202177411956430,0.000000,0.007519,0.003737,0.000000,0.655405,0.343716,0.352910,0.100402,0.141892,0.167562,...,0.418046,0.712851,0.042793,0.097744,0.105179,0.174699,444.0,931.0,1873.0,498.0
ID00009637202177434476278,0.002224,0.009304,0.011609,0.023432,0.047351,0.061245,0.056981,0.056990,0.000000,0.000000,...,0.000117,0.000106,0.950425,0.929277,0.931293,0.919472,15290.0,34501.0,68654.0,18863.0
ID00010637202177584971671,0.001401,0.036945,0.035211,0.058617,0.661064,0.234537,0.306338,0.212192,0.226891,0.205064,...,0.306338,0.248535,0.057423,0.121627,0.161720,0.362251,714.0,2409.0,3976.0,853.0
ID00012637202177665765362,0.007543,0.030099,0.017803,0.004643,0.103448,0.088739,0.081129,0.048282,0.473060,0.247535,...,0.633774,0.835655,0.004310,0.005708,0.011953,0.029712,928.0,1927.0,3932.0,1077.0
ID00014637202177757139317,0.047120,0.026022,0.029810,0.017422,0.384817,0.203222,0.214770,0.020906,0.117801,0.057001,...,0.554201,0.815331,0.170157,0.122677,0.135501,0.125436,382.0,807.0,1476.0,287.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ID00419637202311204720264,0.055696,0.079595,0.110145,0.255102,0.437975,0.160637,0.211594,0.027211,0.169620,0.156295,...,0.270290,0.343537,0.194937,0.290883,0.265942,0.302721,395.0,691.0,1380.0,294.0
ID00421637202311550012437,0.019580,0.021282,0.020160,0.018601,0.548718,0.207537,0.222824,0.093003,0.000000,0.002169,...,0.262288,0.263286,0.318881,0.463874,0.493518,0.624889,2145.0,7377.0,14038.0,4516.0
ID00422637202311677017371,0.024613,0.039399,0.038049,0.043470,0.586144,0.180933,0.205764,0.026626,0.109997,0.059713,...,0.399722,0.498279,0.113947,0.298753,0.289390,0.376019,3291.0,10584.0,19396.0,5521.0
ID00423637202312137826377,0.024842,0.024038,0.021811,0.012074,0.468294,0.155275,0.230002,0.085723,0.125082,0.030836,...,0.127393,0.109267,0.288080,0.636397,0.569428,0.792635,4589.0,8237.0,16139.0,3313.0


In [270]:
df.to_csv('pivot_area_b0_0602_aug_binary_12120p8.csv')