In [1]:
import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Albumentations for augmentations
import albumentations as A
from albumentations.pytorch import ToTensorV2


from sklearn.metrics import f1_score,roc_auc_score


import timm
from timm.models.efficientnet import *

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict


import warnings
warnings.filterwarnings("ignore")

from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
import scipy
from scipy import ndimage

import glob

In [2]:
test_ct_all_list=list(glob.glob("work_test/test_crop/*/*")) 

In [3]:
len(test_ct_all_list)

874235

In [4]:
test_area=[]
for path in tqdm(test_ct_all_list):
    img = cv2.imread(path)
    img2=ndimage.minimum_filter(img,5)
    img_b=np.where(img2<100,0,255)
    mask=scipy.ndimage.binary_fill_holes(img_b[:,:,0])
    mask_=mask*255
    aaa=mask_-img_b[:,:,0]
    test_area.append(aaa.sum()/255)

100%|██████████| 874235/874235 [2:25:30<00:00, 100.13it/s]  


In [5]:
test_area_df=pd.DataFrame((zip(test_ct_all_list, test_area)), columns = ['path', 'area'])

In [6]:
test_area_df

Unnamed: 0,path,area
0,work_test/test_crop/ct_scan_4475/73.jpg,3877.0
1,work_test/test_crop/ct_scan_4475/29.jpg,49009.0
2,work_test/test_crop/ct_scan_4475/4.jpg,306.0
3,work_test/test_crop/ct_scan_4475/23.jpg,39893.0
4,work_test/test_crop/ct_scan_4475/26.jpg,45906.0
...,...,...
874230,work_test/test_crop/ct_scan_3419/62.jpg,23720.0
874231,work_test/test_crop/ct_scan_3419/55.jpg,57899.0
874232,work_test/test_crop/ct_scan_3419/54.jpg,60696.0
874233,work_test/test_crop/ct_scan_3419/12.jpg,36171.0


In [7]:
test_area_df.to_csv("work_test/test_area_df.csv",index=False)

In [8]:
test_area_df["ct_path"]=test_area_df["path"].apply(lambda x: "/".join(x.split("/")[:-1]))
test_area_df["ct_slice"]=test_area_df["path"].apply(lambda x: int(x.split("/")[-1].split(".")[0]))

In [9]:
test_area_df.sort_values(by=['ct_path', 'ct_slice'], inplace=True)

In [10]:
test_area_df=test_area_df.reset_index(drop=True)

In [11]:
test_area_df

Unnamed: 0,path,area,ct_path,ct_slice
0,work_test/test_crop/ct_scan_0/0.jpg,2089.0,work_test/test_crop/ct_scan_0,0
1,work_test/test_crop/ct_scan_0/1.jpg,2737.0,work_test/test_crop/ct_scan_0,1
2,work_test/test_crop/ct_scan_0/2.jpg,3532.0,work_test/test_crop/ct_scan_0,2
3,work_test/test_crop/ct_scan_0/3.jpg,5373.0,work_test/test_crop/ct_scan_0,3
4,work_test/test_crop/ct_scan_0/4.jpg,9076.0,work_test/test_crop/ct_scan_0,4
...,...,...,...,...
874230,work_test/test_crop/ct_scan_999/38.jpg,5596.0,work_test/test_crop/ct_scan_999,38
874231,work_test/test_crop/ct_scan_999/39.jpg,4850.0,work_test/test_crop/ct_scan_999,39
874232,work_test/test_crop/ct_scan_999/40.jpg,4397.0,work_test/test_crop/ct_scan_999,40
874233,work_test/test_crop/ct_scan_999/41.jpg,3720.0,work_test/test_crop/ct_scan_999,41


In [12]:
def sum_max(a,w=0.4):
    l=len(a)
    k=int(np.ceil(l*w))
    d=0
    tmp_max=0
    for i in range(l-k+1):
        if np.sum(a[i:i+k])>tmp_max:
            tmp_max=np.sum(a[i:i+k])
            d=i
    return d,d+k

In [13]:
ct_path_list=test_area_df["ct_path"].unique()

In [14]:
test_dic={}
for i in tqdm(range(len(ct_path_list))):
    tmp_df=test_area_df[test_area_df["ct_path"]==ct_path_list[i]].reset_index(drop=True)
    test_dic[ct_path_list[i]]=list(sum_max(tmp_df["area"].values,0.5))

100%|██████████| 5281/5281 [05:39<00:00, 15.55it/s]


In [15]:
import pickle
with open('work_test/test_dic1_05.pickle', 'wb') as handle:
    pickle.dump(test_dic, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [16]:
# ct_path_list=test_area_df["ct_path"].unique()
# for i in range(100):
    
#     tmp_df=test_area_df[test_area_df["ct_path"]==ct_path_list[i]].reset_index(drop=True)
#     a,b=test_dic[ct_path_list[i]]
#     print(ct_path_list[i])
#     plt.plot(tmp_df["area"])
#     plt.plot(tmp_df["area"][a:b])
#     print(a,b)
#     plt.show()