### GAN LIDC from this notebook 
https://github.com/chufangao/3D_GAN_Lung_Nodules/blob/a378ea2d4b960c2c828c334c26376112bdd43fcc/our_models/3DCNNaugmentedtraining2.py

In [3]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.3.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
[K     |████████████████████████████████| 11.5 MB 7.0 MB/s eta 0:00:01
Installing collected packages: pandas
Successfully installed pandas-1.3.4


In [13]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.0.9-py2.py3-none-any.whl (242 kB)
[K     |████████████████████████████████| 242 kB 8.2 MB/s eta 0:00:01
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.9


In [4]:
import pickle
import gc
import pandas as pd
from collections import OrderedDict
import numpy as np
import random
random.seed(10)

In [5]:
#global / box sizes (40,40,18)
Xsize = 40
Ysize = 40
Zsize = 18

In [6]:
def createSample(listp, dictp, zcenter):
    # creates a sample from inputbox, imageDict, zcenter
    listToReturn = []
    slicesfound = 0 #for debugging
    centerZIndex = None
    # create volume and list of minz maxz
    if zcenter in dictp:
        centerZIndex = list(dictp.keys()).index(zcenter)
    minZIndex = int(centerZIndex - Zsize/2)
    maxZIndex = int(centerZIndex + Zsize/2)
    if minZIndex < 0 or maxZIndex > len(dictp): #for debugging
        print("Slice out of range")
        slicefail = True
        return ([], [])
    zlist = list(dictp.items())
    minzbound = float(zlist[minZIndex][0])
    maxzbound = float(zlist[maxZIndex][0])
    for j in range(minZIndex, maxZIndex):
        value = zlist[j][1]
        part = np.array(value)
        slicewanted = part[listp[0][0]:listp[0][1],listp[1][0]:listp[1][1]]
        listToReturn.append(slicewanted)
    return (listToReturn, [minzbound, maxzbound])

In [7]:
def CreateTranslatedPositive(listp, dictp, zcenter):
    #boxXY, imageDict, centerZ
    #Randomized translation:
    xShift = random.randint(-10, 10)
    yShift = random.randint(-10, 10)
    zShift = random.randint(-4, 4)
    
    listp[0][0] += xShift
    listp[0][1] += xShift
    listp[1][0] += yShift
    listp[1][1] += yShift
    
    listToReturn = []
    slicesfound = 0 #for debugging
    # check z translate range
    centerZIndex = None
    if zcenter in dictp:
        centerZIndex = list(dictp.keys()).index(zcenter)
        centerZIndex += zShift
    minZIndex = int(centerZIndex - Zsize/2)
    maxZIndex = int(centerZIndex + Zsize/2)
    if minZIndex < 0 or maxZIndex > len(dictp): #for debugging
        print("Slice out of range")
        slicefail = True
        return ([], [])
    zlist = list(dictp.items())
    minzbound = float(zlist[minZIndex][0])
    maxzbound = float(zlist[maxZIndex][0])
    # append slices to create volume
    for k in range(minZIndex, maxZIndex):
        value = zlist[k][1]
        part = np.array(value)
        slicewanted = part[listp[0][0]:listp[0][1],listp[1][0]:listp[1][1]]
        listToReturn.append(slicewanted)
    # return volume, [minz, maxz]
    return (listToReturn, [minzbound, maxzbound])

In [8]:
def createNegative(listp, dictp, slthick):
    # listp = exclude_set, dictp = imageDict, slice thickness
    while (True):
        # get xmin and y min
        xmin = random.randint(0, 512 - Xsize)
        ymin = random.randint(0, 512 - Ysize)
        # get z from valid zs
        allZs = list(dictp.keys())
        validZs = allZs[int(0 + .5*Zsize) : int(len(allZs) - (.5*Zsize))]
        zcenter = random.choice(validZs)
        zmin = zcenter - slthick * .5 * Zsize 
        intersect = False
        for box in listp:
            xcoords = box[0]
            ycoords = box[1]
            zcoords = box[2]
            # check if xmin, ymin, zmin, in range of nodule
            if xmin in range(xcoords[0] - Xsize, xcoords[1]):
                if ymin in range(ycoords[0] - Ysize, ycoords[1]):
                    if zmin >= zcoords[0] - slthick * Zsize and zmin <= zcoords[1]:
                        intersect = True
                        break
        # if no intersections, add a negative sample
        if (not intersect):
            samplex = [xmin, xmin + Xsize]
            sampley = [ymin, ymin + Ysize]
            inputBox = [samplex, sampley]
            return createSample(inputBox, dictp, zcenter)

In [11]:
ls 3D_GAN_Lung_Nodules/data_preprocessing/

 CNNinputDataExtractionV3.py   SlidingPositives.py
'Code Documentation.pdf'       ValClippedInputDataExtraction.py
 CompareResults.py             Wholescanapplication2.1optimized.py
 FPCounter.py                  Wholescanapplication2.1test.py
 IC3DCNN4.2.py                 noduleDimensions.xlsx
 IC3DCNN4.2NoIntensity.py      normalize.py
 NegativeExampleMaker.py


In [14]:
excel_path = '3D_GAN_Lung_Nodules/data_preprocessing/'
x1 = pd.ExcelFile(excel_path + "/noduleDimensions.xlsx")
allNodules = x1.parse(x1.sheet_names[0])

In [15]:
allNodules = allNodules.sort_values(['SeriesID'])

In [16]:
IDs = list(allNodules["SeriesID"].drop_duplicates())
validation_set = IDs[-120:-1]
validation_set.append(IDs[-1])

In [18]:
nodulesToUse = x1.parse(x1.sheet_names[2])  
noduleSet = set(nodulesToUse["NoduleID"])
del nodulesToUse
del x1

In [24]:
print("There are {} unique nodules".format(len(noduleSet)))

There are 969 unique nodules


#### Generate samples

In [51]:
counter = 0
counterzeta = 0
savePath = '/home/cc/Data/'

print ("Start")
slicefail = False

Start


In [44]:
prevID = None
exclude_set = []
slicethickness = None
positivelist = []
sfailedposlist  = []
pfailedposlist  = []
negativelist = []
sfailedneglist = []
pfailedneglist = []
tempdict = None
imageDict = None
takeNegativeSample = True
seriesIDset = set()
counterx = 0

In [66]:
#choose 4th the sample, it is in the nodule set
i = 4
print("Slice thinkness of nodule number {} is {}".format(i, allNodules["SliceThickness"][i]))
nodeID = allNodules["NoduleID"][i]
seriesID = allNodules["SeriesID"][i]
print("Series id for nodule {} is {} and node id is {}".format(i, seriesID, nodeID))

Slice thinkness of nodule number 4 is 2.5
Series id for nodule 4 is 1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615202213033480003264 and node id is 5


In [72]:
print("Is nodeID in the noduleSet? {}".format(nodeID in noduleSet))

Is nodeID in the noduleSet? True


In [74]:
centerX = allNodules["centerX"][i]
boxX = [centerX-int(.5*Xsize), centerX+int(.5*Xsize)]
centerY = allNodules["centerY"][i]
boxY = [centerY-int(.5*Ysize), centerY+int(.5*Ysize)]
centerZ = allNodules["centerZ"][i]
#bdsadasox [[x1,x2],[y1,y2]]
boxXY = [boxX, boxY]
print("center = ({},{},{}), box = ({},{})".format(centerX,centerY,centerZ, boxX, boxY))

center = (221,218,-166.5), box = ([201, 241],[198, 238])


In [75]:
centerZ not in imageDict

TypeError: argument of type 'NoneType' is not iterable

In [48]:
#series id is not equal to previous id
prevID != seriesID

True

In [49]:
# series ID is not in validation set
seriesID in validation_set

False

In [52]:
counter

0

In [55]:
counter+=1

In [60]:
if prevID in seriesIDset:
    print ("Repeate Series ID: " + str(prevID)) 
else:
    seriesIDset.add(prevID)
print(seriesIDset)

Repeate Series ID: None
{None}


In [61]:
filestring = str(seriesID)
filestring

'1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192'

In [None]:
#first, we generate a positive sample
prevID = seriesID

In [37]:
boxX = [allNodules["minimumX"][i], allNodules["maximumX"][i]]
boxY = [allNodules["minimumY"][i], allNodules["maximumY"][i]]
boxZ = [allNodules["minimumZ"][i], allNodules["maximumZ"][i]]
print("Box co-ordinates for nodule {} is ({},{},{})".format(i, boxX, boxY, boxZ))

Box co-ordinates for nodule 0 is ([297, 340],[340, 391],[-125.0, -105.0])


In [40]:
centerZ = allNodules["centerZ"][i]
print("centerZ for nodule {} is {}".format(i, centerZ))

centerZ for nodule 0 is -115.0


In [41]:
centerZ * -1 in imageDict

TypeError: argument of type 'NoneType' is not iterable

In [42]:
if centerZ in imageDict:
    exclude_set.append([boxX, boxY, boxZ])
elif centerZ * -1 in imageDict:
    temp = boxZ[0]
    boxZ[0] = -1 * boxZ[1]
    boxZ[1] = -1 * temp
    exclude_set.append([boxX, boxY, boxZ])
else:
    takeNegativeSample = False
    print('centerz', centerZ)
    counterzeta += 1

TypeError: argument of type 'NoneType' is not iterable

In [32]:
#create a negative sample
#negtoadd, zholder = createNegative(exclude_set, imageDict, slicethickness)

AttributeError: 'NoneType' object has no attribute 'keys'