In [83]:
import numpy as np
import pandas as pd
import cv2
import csv
from PIL import Image
import torch
from torch import optim, nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data.dataset import Dataset
import matplotlib.pyplot as plt
import os.path
from os import path
from collections import OrderedDict
import time


In [84]:
print(os.getcwd())

/group/donut/Anupama/medical_ip/NIH_code/Preprocessing_Segmentation/images/images_002


Read data sets and read image index

In [75]:
file_name = 'ValData_Segmented.csv'
a = pd.read_csv(file_name)
imgIdx = a['Image Index'] 
labelName = a['Finding Labels']

Preprocessing the image

In [76]:
def PreProcessing(image):
    #step 1 : resize the image to 512 x 512
    resized = cv2.resize(image, (512, 512))
    
    #step 2: Convert image to grayscale
    if(len(image.shape) > 2):
        gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)
    else:
        gray = resized;

    #step 3: Smoothing out the noise in the image
    filImg = cv2.GaussianBlur(gray, (5,5), 6)

    #step 4: Histogram equalisation
    hist = cv2.equalizeHist(filImg)

    #step 5: Thresholding
    ret, binImg = cv2.threshold(hist, 100, 255, cv2.THRESH_BINARY)

    #cropping unnecessary height and width
    return hist, binImg
    

Extracting segments of the image

In [77]:
def Segments(crop):
    
    #finding edges
    edged = cv2.Canny(crop, 0, 1000)

    #closing the gaps between the edges - dilation morphological operation
    kernel = np.zeros((5,5),np.uint8)
    edged = cv2.dilate(edged, kernel, iterations = 10)

    #find contours
    cnt, hierarchy = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)         
    openImg = edged
    cnts = sorted(cnt, key = cv2.contourArea, reverse = True)[:len(cnt)]                

    # Find the convex hull object for each contour
    hull_list = []
    for i in range(len(cnts)):       
        #print(cv2.contourArea(cnts[i]))
        hull = cv2.convexHull(cnts[i])
        hull_list.append(hull)

    cv2.drawContours(openImg, hull_list, -1, (255,255,255), -1)

    return openImg

Finding the image from the dataset folder

In [78]:
def resolve_full_path(img_name):
    is_found = False

    # Read 1 image file
    folder_idx_range = 13
    img_path = ''
    for folder_idx in range(folder_idx_range):
        path_prefix = path.expanduser("~/data/kaggle/nih-chest-xrays/data/images_")
        path_suffix = "images/"
        cur_img_dir = path_prefix +str(folder_idx).zfill(3) +'/'
        img_folder_path = path.join(cur_img_dir, path_suffix)
        img_path = os.path.join(img_folder_path, img_name)

        if(path.exists(img_path)):
            is_found = True
            break
    if(not is_found):
        
        raise Exception('Couldn\'t find: {} last:{}'.format(img_name, img_path))
    return img_path



In [87]:
os.chdir("Preprocessing_Segmentation")
print(os.getcwd())

/group/donut/Anupama/medical_ip/NIH_code/Preprocessing_Segmentation


Preprocessing and segmenting the image and storing in the folder

In [79]:
storePath = "Segmentation_Validation"
os.chdir(storePath)
print(os.getcwd())

/group/donut/Anupama/medical_ip/NIH_code/Preprocessing_Segmentation/Segmentation_Validation


In [80]:
for idx in range(len(imgIdx)):
    imgPath = resolve_full_path(imgIdx[idx])
    img = cv2.imread(imgPath)
    hist, binImg = PreProcessing(img)
    segments = Segments(binImg)
    overlap = cv2.bitwise_and(hist, segments)
    fileName = imgIdx[idx]
    cv2.imwrite(fileName, overlap)

In [49]:
os.chdir("../")

Writing the segmented images names and labels in a new csv

In [94]:
print(os.getcwd())

/group/donut/Anupama/medical_ip/NIH_code/Preprocessing_Segmentation


In [96]:
readfilename = '~/group/donut/medical_ip/Multi_Label_Dataloader_and_Classifier/valdata_paul.csv'
writefilename = 'ValData_Segmented.csv'
gen = GenerateAugmentedDataCSV(readfilename, writefilename)
gen.WriteToFile()

Class to generate csv with names and labels of the augmented images
Parameter 1: input csv file
Parameter 2: generated csv file with new image names and corresponding labels

In [95]:
class GenerateAugmentedDataCSV:
    def __init__(self, readfilename, writefilename):
        self.data = pd.read_csv(readfilename)
        self.imageNames = self.data['Image Index']
        self.labelNames = self.data['Finding Labels']
        self.fileName = writefilename
        
    def WriteToFile(self):
        with open(self.fileName, 'a') as file:
            writer = csv.writer(file)
            writer.writerow(['Image Index', 'Finding Labels'])
            for idx in range(len(self.imageNames)):
                #imgName = str(idx+1)+'.png '
                imgName = self.imageNames[idx]
                label = self.labelNames[idx]
                writer.writerow([imgName, label])   