# Import Libraries (env = conda_pytorch_p37)

In [1]:
from pathlib import Path
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import tqdm
import os
import re
import cv2
import pandas as pd
from fastai.medical.imaging import *
from fastai.vision.all import *
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.patches as ptc
from tqdm import tqdm # for getting a progress bar on loops
import pymysql
import time
from PIL import Image
import pymysql
from sqlalchemy import create_engine


pd.set_option('display.max_columns', 500)

# Functions

In [2]:
%%time
# Bring in Dicom Metadata

# Read a Dicom Image
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

def get_dcm_contents(file):
    dcm = Path(f_path + file).dcmread()    
    properties = [string for string in dir(dcm) if prog.match(string).group(0)!='']
    dict1 = {'file': file.replace('.dicom', '')}    
    dict1.update( { what: dcm[what].value for what in properties if isinstance(dcm[what].value, (bytes, bytearray))!=True } )
    return dict1


# Convert to JPG and resize to max 1024 pixels
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h,w) = image.shape[:2]
    
    if width is None and height is None:
        return image
    
    if width is None:
        # calculate the ratio of the height and construct the dimensions
        r = height / float(h)
        dim = (int(w*r), height)
        
    else:
        r = width / float(w)
        dim = (width, int(h*r))
    
    # resize image
    img = cv2.resize(image, dim, interpolation=inter)
    
    # return the resized image
    return img





CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 10.3 µs


In [3]:
%%time
f_path = 'DemoDicomImages/'
files = [f for f in os.listdir(f_path) if os.path.isfile(os.path.join(f_path, f))]
prog = re.compile('^[A-Z]*')

# Bring in Dicom FilesBring in Dicom Metadata

df = pd.DataFrame( [ get_dcm_contents(file) for file in files ] )
# df = val_files.append(val_files, ignore_index=True)

# Convert to JPG and resize to max 1024 pixels
val_outdir = 'DemoConversionToJPG/'

for files in os.listdir(val_outdir):
    path = os.path.join(val_outdir, files)
    try:
        shutil.rmtree(path)
    except OSError:
        os.remove(path)

if not os.path.exists(val_outdir):
    os.mkdir(val_outdir)
    
# Convert DICOM to JPG via openCV
val_list = [os.path.basename(x) for x in glob.glob(f_path + './*.dicom')]
# print(val_list)
for f in tqdm(val_list):  
    if not os.path.exists(f_path + f[:-5] + 'jpg'):
        img = read_xray(f_path + f) # read dicom image
        img = resize(img,height=1024)
        cv2.imwrite(val_outdir + f.replace('.dicom','.jpg'),img) # write jpg image
        
val_jpg_files = glob.glob(f'{val_outdir}/*.jpg')
print (f'Number of val_jpg_test files in {val_outdir}: {len(val_jpg_files)}')

  f"The (0028,0101) 'Bits Stored' value ({bits_stored}-bit) "
  f"The (0028,0101) 'Bits Stored' value ({bits_stored}-bit) "
100%|██████████| 214/214 [03:40<00:00,  1.03s/it]

Number of val_jpg_test files in DemoConversionToJPG/: 214
CPU times: user 3min 21s, sys: 20.9 s, total: 3min 42s
Wall time: 3min 41s





In [5]:
df

Unnamed: 0,file,BitsAllocated,BitsStored,Columns,HighBit,LossyImageCompression,NumberOfFrames,PatientAge,PatientSex,PhotometricInterpretation,PixelAspectRatio,PixelRepresentation,PixelSpacing,Rows,SamplesPerPixel,WindowCenter,WindowWidth,RescaleIntercept,RescaleSlope,PatientSize,PatientWeight,LargestImagePixelValue,SmallestImagePixelValue,LossyImageCompressionMethod,LossyImageCompressionRatio
0,0d825340b08e816f7d8f28c7c7809a31,16,16,2330,15,00,1.0,000Y,M,MONOCHROME2,,0,"[0.141, 0.141]",2485,1,41367.0,65435.0,,,,,,,,
1,05711a72a0819422965daa93f30b6f75,16,12,1994,11,00,,,O,MONOCHROME2,,0,"[0.175, 0.175]",2430,1,2047.0,4096.0,0.0,1.0,,,,,,
2,0403dda5a9bf46457517b604869d530d,16,14,3072,13,,1.0,Y,O,MONOCHROME2,,0,"[0.139000, 0.139000]",3072,1,3507.0,4199.0,0.0,1.0,,,,,,
3,05e951c63e80999f13e6e09e7ec8439a,16,12,1994,11,00,,,O,MONOCHROME2,,0,"[0.175, 0.175]",2430,1,2047.0,4096.0,0.0,1.0,,,,,,
4,004f33259ee4aef671c2b95d54e4be68,16,12,2517,11,00,,060Y,F,MONOCHROME2,"[1, 1]",0,"[0.139, 0.139]",3028,1,2048.0,4096.0,0.0,1.0,,,4095.0,0.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,011295e0bcdc7636569ab73bfdcc4450,16,12,1994,11,00,,,M,MONOCHROME2,,0,"[0.175, 0.175]",2430,1,2047.0,4096.0,0.0,1.0,,,,,,
210,0168eb925aa6f28a78b16134792f5d0e,16,12,2336,11,00,,,O,MONOCHROME1,,0,"[0.15, 0.15]",2836,1,2047.0,4095.0,0.0,1.0,,,,,,
211,0c8cb29888d314a0a8cfd0f8278af74c,16,12,2642,11,00,,,,MONOCHROME2,,0,"[0.125, 0.125]",3170,1,2070.0,4579.0,0.0,1.0,,,,,,
212,0b4e8de571c66cf889f2beb35182974b,16,12,1994,11,00,,,O,MONOCHROME2,,0,"[0.175, 0.175]",2430,1,2047.0,4096.0,0.0,1.0,,,,,,


### Insert headers and jpg images to SingleStore

In [7]:
%%time
s2conn = create_engine('mysql+pymysql://root:Sglstrpw34@172.31.62.112:3306/PatientRecords')
df.to_sql('ImageHeaderdf', s2conn, if_exists='replace', index = False)
# df = pd.read_sql_table('ImageHeaderdf', s2conn)

def convertToBinaryData(ImageFile):
    # Convert digital data to binary format
    with open(ImageFile, 'rb') as file:
        binaryData = file.read()
    return binaryData


def insertBLOB(ImageID, ImagePath, ImageFile):
    #print("Inserting BLOB into JPGImages table")
    # try:
    mycursor = s2conn.cursor()

    sql_insert_blob_query = """ INSERT IGNORE INTO JPGImages
                      (file, ImagePath, Image) VALUES (%s,%s,%s)"""

    jpgImage = convertToBinaryData(ImageFile)

    # Convert data into tuple format
    insert_blob_tuple = (ImageID, ImagePath, jpgImage)
    result = mycursor.execute(sql_insert_blob_query, insert_blob_tuple)
    s2conn.commit()
    #print("Image inserted successfully as a BLOB into Images table", result)

    #except pymysql.connect.Error as error:
        #print("Failed inserting BLOB data into S2 table {}".format(error))

    #finally:
        #if s2conn.is_connected():
    mycursor.close()
    
    
# Main
directory = '/home/ubuntu/vinbigdata/DemoConversionToJPG/'
jpgCount = 0

startTime = datetime.now()
print("Starting to Inserted jpg files ",startTime)

s2conn = pymysql.connect(
    user='root',
    password='Sglstrpw34',
    host='172.31.62.112',
    port=3306,
    database='Images')

# iterate over files in
# that directory
for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        file = os.path.splitext(os.path.basename(os.path.basename(f)))[0]

        # checking if it is a file
        if os.path.isfile(f) and f.endswith(".jpg"):
            insertBLOB(file, directory, f)
            jpgCount += 1
            if jpgCount % 100 == 0:
                print (jpgCount, " Elapse Time", (datetime.now() - startTime))

s2conn.close()
print("Inserted ",jpgCount, " End Time ",datetime.now(), " Elapse Time ",(datetime.now() - startTime))

Starting to Inserted jpg files  2021-12-10 02:44:12.714499
100  Elapse Time 0:00:01.489719
200  Elapse Time 0:00:03.087259
Inserted  214  End Time  2021-12-10 02:44:15.998968  Elapse Time  0:00:03.284480
CPU times: user 2.56 s, sys: 39.2 ms, total: 2.59 s
Wall time: 3.44 s


In [None]:
# columns = ['file',
#            'BitsAllocated',
#            'BitsStored',
#            'Columns',
#            'HighBit',
#            'LossyImageCompression',
#            'PatientSex',
#            'PhotometricInterpretation',
#            'PixelRepresentation',
#            'PixelSpacing',
#            'RescaleIntercept',
#            'RescaleSlope',
#            'Rows',
#            'SamplesPerPixel',
#            'WindowCenter',
#            'WindowWidth',
#            'PatientSize',
#            'PatientWeight',
#            'PixelAspectRatio',
#            'PatientAge',
#            'LossyImageCompressionRatio',
#            'LargestImagePixelValue',
#            'SmallestImagePixelValue',
#            'LossyImageCompressionMethod',
#            'NumberOfFrames']

In [None]:
# There are some Python package conflicts so we will send Metadata to S2 here and bring it back in on environment Conda_Anaconda3

# Model Inference on Validation images (env = Conda_Anaconda3)

In [1]:
# Imports
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import os
import torch
from IPython.display import Image, clear_output
import time
from tqdm.notebook import tqdm
import pymysql
from sqlalchemy import create_engine

In [2]:
# Check GPU
clear_output()
print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.10.0+cu102 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16160MB, multi_processor_count=80)


In [3]:
# Model format conversion
def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

# Functions
def GeneralEnsemble(dets, iou_thresh = 0.5, weights=None):
    assert(type(iou_thresh) == float)
    
    ndets = len(dets)
    
    if weights is None:
        w = 1/float(ndets)
        weights = [w]*ndets
    else:
        assert(len(weights) == ndets)
        
        s = sum(weights)
        for i in range(0, len(weights)):
            weights[i] /= s

    out = list()
    used = list()
    
    for idet in range(0,ndets):
        det = dets[idet]
        for box in det:
            if box in used:
                continue
                
            used.append(box)
            # Search the other detectors for overlapping box of same class
            found = []
            for iodet in range(0, ndets):
                odet = dets[iodet]
                
                if odet == det:
                    continue
                
                bestbox = None
                bestiou = iou_thresh
                for obox in odet:
                    if not obox in used:
                        # Not already used
                        if box[4] == obox[4]:
                            # Same class
                            iou = computeIOU(box, obox)
                            if iou > bestiou:
                                bestiou = iou
                                bestbox = obox
                                
                if not bestbox is None:
                    w = weights[iodet]
                    found.append((bestbox,w))
                    used.append(bestbox)
                            
            # Now we've gone through all other detectors
            if len(found) == 0:
                new_box = list(box)
                new_box[5] /= ndets
                out.append(new_box)
            else:
                allboxes = [(box, weights[idet])]
                allboxes.extend(found)
                
                xc = 0.0
                yc = 0.0
                bw = 0.0
                bh = 0.0
                conf = 0.0
                
                wsum = 0.0
                for bb in allboxes:
                    w = bb[1]
                    wsum += w

                    b = bb[0]
                    xc += w*b[0]
                    yc += w*b[1]
                    bw += w*b[2]
                    bh += w*b[3]
                    conf += w*b[5]
                    #print(f"conf = {conf}")
                
                xc /= wsum
                yc /= wsum
                bw /= wsum
                bh /= wsum    

                new_box = [xc, yc, bw, bh, box[4], conf]
                out.append(new_box)
    return out
    
def getCoords(box):
    x1 = float(box[0]) - float(box[2])/2
    x2 = float(box[0]) + float(box[2])/2
    y1 = float(box[1]) - float(box[3])/2
    y2 = float(box[1]) + float(box[3])/2
    return x1, x2, y1, y2
    
def computeIOU(box1, box2):
    x11, x12, y11, y12 = getCoords(box1)
    x21, x22, y21, y22 = getCoords(box2)
    
    x_left   = max(x11, x21)
    y_top    = max(y11, y21)
    x_right  = min(x12, x22)
    y_bottom = min(y12, y22)

    if x_right < x_left or y_bottom < y_top:
        return 0.0    
        
    intersect_area = (x_right - x_left) * (y_bottom - y_top)
    box1_area = (x12 - x11) * (y12 - y11)
    box2_area = (x22 - x21) * (y22 - y21)        
    
    iou = intersect_area / (box1_area + box2_area - intersect_area)
    return iou

# Function to transform the YOLOV5 output to the format the Ensemble function expects. 

def transform_object(df,tmp,flag):
    list_of_floats=[]
    for item in tmp:
        list_of_floats.append(float(item))
        
    tm=int(len(list_of_floats)/6)
    list1=[]
    list2=[]
    checkobj=[]
    for i in range(tm):
          
        j=i*6
        k=j
        l=k+6
            
        checkobj.append(k)
        xmin=list_of_floats[k+2]
        ymin=list_of_floats[k+3]
        xmax=list_of_floats[k+4]
        ymax=list_of_floats[k+5]
            
        box_w=xmax-xmin
        box_h=ymax-ymin
        box_x=xmin+(box_w/2)
        box_y=ymin+box_h/2
            
        list1=[box_x,box_y,box_w,box_h,int(list_of_floats[k]),list_of_floats[k+1]]
        
        list2.append(list1)        
        list1=[]
    if flag==0:
        return checkobj
    else:
        return list2
    
#https://www.kaggle.com/prashantkikani/vinbigdata-ensemble-post-processing?scriptVersionId=56245340

def divide(l, n):
    '''
    divide submission string into group of 6
    '''
    for i in range(0, len(l), n):  
        yield l[i:i + n]

# Bring in Dicom header information for Demo Files from SingleStore

In [4]:
os.chdir('/home/ubuntu/vinbigdata/')
s2conn = create_engine('mysql+pymysql://root:Sglstrpw34@172.31.62.112:3306/PatientRecords')

#needs to be header information of dicom files
# test_df = pd.read_csv('test.csv')
test_df = pd.read_sql_table('ImageHeaderdf', s2conn)

  "detect unicode returns: %r" % de
  util.warn("Unknown schema content: %r" % line)


In [5]:
test_df

Unnamed: 0,file,BitsAllocated,BitsStored,Columns,HighBit,LossyImageCompression,NumberOfFrames,PatientAge,PatientSex,PhotometricInterpretation,...,WindowCenter,WindowWidth,RescaleIntercept,RescaleSlope,PatientSize,PatientWeight,LargestImagePixelValue,SmallestImagePixelValue,LossyImageCompressionMethod,LossyImageCompressionRatio
0,0403dda5a9bf46457517b604869d530d,16,14,3072,13,,1.0,Y,O,MONOCHROME2,...,3507.0,4199.0,0.0,1.0,,,,,,
1,034b98d64dc012298afb3d33fe880193,16,14,2540,13,,1.0,030Y,F,MONOCHROME2,...,9761.0,10590.0,,,,,,,,
2,056815f22ac3de9b9eb6fb963d4cfa5a,16,12,3320,11,00,,,,MONOCHROME2,...,2048.0,4096.0,0.0,1.0,,,,,,
3,07229c769cb2c284c1114c6fe7ed3dd6,16,12,2336,11,00,,,O,MONOCHROME1,...,2047.0,4095.0,0.0,1.0,,,,,,
4,03ea976a276b66285825246525357949,16,12,3000,11,00,,,F,MONOCHROME2,...,2047.0,4095.0,0.0,1.0,0.0,0.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,013c169f9dad6f1f6485da961b9f7bf2,16,10,2336,9,00,,,O,MONOCHROME1,...,511.0,1023.0,0.0,1.0,,,,,,
210,0ecb6d6e449d5f71b183a6f7fe214c10,16,14,2468,13,00,,062Y,M,MONOCHROME2,...,8191.0,16383.0,0.0,1.0,0.0,0.0,,,,
211,0e36597f559be16b6304f3771082c82a,16,12,2948,11,00,,,F,MONOCHROME2,...,2047.0,4095.0,0.0,1.0,0.0,0.0,,,,
212,0eddd00dffcdacb1c8ac0d0547c545a7,16,12,2552,11,00,,,,MONOCHROME2,...,1286.0,5328.0,0.0,1.0,,,,,,


# Inference on all 5 folds

In [6]:
%%time
fold_exp = ['exp26']
test_dir = f'/home/ubuntu/vinbigdata/DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')


for fold, exp in enumerate(fold_exp):
    weights_dir = f'/home/ubuntu/vinbigdata/yolov5/runs/train/{exp}/weights/best.pt'
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.1\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        # print(image_id)
        # print(test_df.loc[test_df.file==image_id,['Columns']].values[0])
        # print(test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0])
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    # Needs to be the dataframe with 
    df0 = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df0 = df0[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')

    # remove files and folders from yolov5/runs/detect/exp/
    dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
    for files in os.listdir(dir):
        path = os.path.join(dir, files)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)

fold_exp = ['exp27']
test_dir = f'/home/ubuntu/vinbigdata/DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')


for fold, exp in enumerate(fold_exp):
    weights_dir = f'/home/ubuntu/vinbigdata/yolov5/runs/train/{exp}/weights/best.pt'
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.1\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        # print(image_id)
        # print(test_df.loc[test_df.file==image_id,['Columns']].values[0])
        # print(test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0])
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    # Needs to be the dataframe with 
    df1 = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df1 = df1[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')

    # remove files and folders from yolov5/runs/detect/exp/
    dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
    for files in os.listdir(dir):
        path = os.path.join(dir, files)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)
            
fold_exp = ['exp28']
test_dir = f'/home/ubuntu/vinbigdata/DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')


for fold, exp in enumerate(fold_exp):
    weights_dir = f'/home/ubuntu/vinbigdata/yolov5/runs/train/{exp}/weights/best.pt'
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.1\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        # print(image_id)
        # print(test_df.loc[test_df.file==image_id,['Columns']].values[0])
        # print(test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0])
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    # Needs to be the dataframe with 
    df2 = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df2 = df2[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')

    # remove files and folders from yolov5/runs/detect/exp/
    dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
    for files in os.listdir(dir):
        path = os.path.join(dir, files)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)            

fold_exp = ['exp29']
test_dir = f'/home/ubuntu/vinbigdata/DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')


for fold, exp in enumerate(fold_exp):
    weights_dir = f'/home/ubuntu/vinbigdata/yolov5/runs/train/{exp}/weights/best.pt'
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.1\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        # print(image_id)
        # print(test_df.loc[test_df.file==image_id,['Columns']].values[0])
        # print(test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0])
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    # Needs to be the dataframe with 
    df3 = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df3 = df3[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')

    # remove files and folders from yolov5/runs/detect/exp/
    dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
    for files in os.listdir(dir):
        path = os.path.join(dir, files)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)
            
fold_exp = ['exp30']
test_dir = f'/home/ubuntu/vinbigdata/DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')


for fold, exp in enumerate(fold_exp):
    weights_dir = f'/home/ubuntu/vinbigdata/yolov5/runs/train/{exp}/weights/best.pt'
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.1\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        # print(image_id)
        # print(test_df.loc[test_df.file==image_id,['Columns']].values[0])
        # print(test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0])
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    # Needs to be the dataframe with 
    df4 = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df4 = df4[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')

    # remove files and folders from yolov5/runs/detect/exp/
    dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
    for files in os.listdir(dir):
        path = os.path.join(dir, files)
        try:
            shutil.rmtree(path)
        except OSError:
            os.remove(path)
            

#Lists for storing each frame of the files

final_list=[]
input_file_data0=[]
input_file_data1=[]
input_file_data2=[]
input_file_data3=[]
input_file_data4=[]

# Process the lists from each of the csvs frame by frame.

for i in range(len(df0)):    
    #File 0
    a= df0.iloc[i,:]
    tmp0=a[1]
    tmp0=tmp0.split()
    
    # File 1
    a1= df1.iloc[i,:]
    tmp1=a1[1]
    tmp1=tmp1.split()
            
    # File 2
    a2= df2.iloc[i,:]
    tmp2=a2[1]
    tmp2=tmp2.split()
        
    # File 3
    a3= df3.iloc[i,:]
    tmp3=a3[1]
    tmp3=tmp3.split()
        
    # File 4
    a4= df4.iloc[i,:]
    tmp4=a4[1]
    tmp4=tmp4.split()
    
#Convert the frames into specific format of Ensemble function

    for j in range(5): 
        globals()['input_file_data%s' % j].append(transform_object(eval('df'+str(j)),eval('tmp'+str(j)),1))



        

#Apply the Ensemble function
df = df0.copy()

for i in tqdm(range(len(df0))):
    final_list=[input_file_data0[i],input_file_data1[i],input_file_data2[i],input_file_data3[i],input_file_data4[i]]
    ens = GeneralEnsemble(final_list,iou_thresh = 0.4)
    lst = []
    for j in ens:
        lst.append(j[4])
        lst.append(j[5])
        lst.append(j[0] - j[2]/2)
        lst.append(j[1] - j[3]/2)
        lst.append(j[0] + j[2]/2)
        lst.append(j[1] + j[3]/2)
    df.iloc[i,1] = lst

for index, row in df.iterrows():
    if row["PredictionString"] == [14, 0.5, 0.0, 0.0, 1.0, 1.0]:
        row["PredictionString"] = [14, 1, 0, 0, 1, 1]
        

for index, row in df.iterrows():
    row["PredictionString"] = ' '.join(str(e) for e in row["PredictionString"])


preds = df['PredictionString'].tolist()
grouped_preds = [list(divide(pred.split(), 6)) for pred in preds]
grouped_preds[:5]

new_preds = []

for pred in grouped_preds:
    temp = ''
    # each box is a tuple of 6 i.e. (class, confidence, xmin, ymin, xmax, ymax)
    for box in pred:
        # if we found some bounding-box i.e. `len(pred) > 1` & class is "No finding".
        if len(pred) > 1 and box[0] == '14':
            # Make the probability 0.
            box[1] = '0'
        temp += ' '.join(box) + ' '
    new_preds.append(temp.strip())
    
new_preds[:5]

df['PredictionString'] = new_preds
df.head()

/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp26/weights/best.pt'], source=/home/ubuntu/vinbigdata/DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.1, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.0-115-gbc48457 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

Fusing layers... 
Model Summary: 444 layers, 86260891 parameters, 0 gradients, 204.2 GFLOPs
image 1/214 /home/ubuntu/vinbigdata/DemoConversionToJPG/

HBox(children=(FloatProgress(value=0.0, max=149.0), HTML(value='')))


/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp27/weights/best.pt'], source=/home/ubuntu/vinbigdata/DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.1, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.0-115-gbc48457 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

Fusing layers... 
Model Summary: 444 layers, 86260891 parameters, 0 gradients, 204.2 GFLOPs
image 1/214 /home/ubuntu/vinbigdata/DemoConversionToJPG

HBox(children=(FloatProgress(value=0.0, max=188.0), HTML(value='')))


/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp28/weights/best.pt'], source=/home/ubuntu/vinbigdata/DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.1, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.0-115-gbc48457 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

Fusing layers... 
Model Summary: 444 layers, 86260891 parameters, 0 gradients, 204.2 GFLOPs
image 1/214 /home/ubuntu/vinbigdata/DemoConversionToJPG

HBox(children=(FloatProgress(value=0.0, max=166.0), HTML(value='')))


/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp29/weights/best.pt'], source=/home/ubuntu/vinbigdata/DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.1, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.0-115-gbc48457 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

Fusing layers... 
Model Summary: 444 layers, 86260891 parameters, 0 gradients, 204.2 GFLOPs
image 1/214 /home/ubuntu/vinbigdata/DemoConversionToJPG

HBox(children=(FloatProgress(value=0.0, max=181.0), HTML(value='')))


/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp30/weights/best.pt'], source=/home/ubuntu/vinbigdata/DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.1, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False
YOLOv5 🚀 v6.0-115-gbc48457 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)

Fusing layers... 
Model Summary: 444 layers, 86260891 parameters, 0 gradients, 204.2 GFLOPs
image 1/214 /home/ubuntu/vinbigdata/DemoConversionToJPG

HBox(children=(FloatProgress(value=0.0, max=152.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=214.0), HTML(value='')))


CPU times: user 4.04 s, sys: 750 ms, total: 4.79 s
Wall time: 1min 55s


Unnamed: 0,file,PredictionString
0,0403dda5a9bf46457517b604869d530d,14 0 0.0 0.0 1.0 1.0 11 0.06 1266.0 312.0 1509...
1,034b98d64dc012298afb3d33fe880193,14 0 0.0 0.0 1.0 1.0 0 0.02 1249.0 492.0 1507....
2,056815f22ac3de9b9eb6fb963d4cfa5a,14 0 0.0 0.0 1.0 1.0 8 0.02 879.0 1960.0 969.0...
3,07229c769cb2c284c1114c6fe7ed3dd6,0 0.5800000000000001 1228.25 805.7499999999999...
4,03ea976a276b66285825246525357949,11 0.18 835.0 537.4999999999999 985.5 663.4999...


In [13]:
test_df

Unnamed: 0,file,BitsAllocated,BitsStored,Columns,HighBit,LossyImageCompression,NumberOfFrames,PatientAge,PatientSex,PhotometricInterpretation,...,WindowCenter,WindowWidth,RescaleIntercept,RescaleSlope,PatientSize,PatientWeight,LargestImagePixelValue,SmallestImagePixelValue,LossyImageCompressionMethod,LossyImageCompressionRatio
0,03f9ce3f34a38cf5c6411acd8fd4c5ff,16,12,1994,11,00,,,O,MONOCHROME2,...,2047.0,4096.0,0.0,1.0,,,,,,
1,1033a2e669d3f19f68e6c8a2b5085661,16,12,1994,11,00,,,O,MONOCHROME2,...,2047.0,4096.0,0.0,1.0,,,,,,
2,0cc5600e54fe599fefce87de375268e3,16,12,1994,11,00,,,O,MONOCHROME2,...,2047.0,4096.0,0.0,1.0,,,,,,
3,0c803c4810a8c5ec362f5d4504489431,16,14,2540,13,,1.0,062Y,F,MONOCHROME2,...,9611.0,11854.0,,,,,,,,
4,10cba931b86360874e0905f97737d1f4,16,16,2494,15,00,1.0,000Y,M,MONOCHROME2,...,32767.0,65535.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,0b1f7b86b0ddb459fb779c6d473d96ad,16,16,2145,15,00,1.0,000Y,F,MONOCHROME2,...,32767.0,65535.0,,,,,,,,
210,0956d88d67ce080413b1b3ac20ab8622,16,12,2048,11,00,,,O,MONOCHROME2,...,2048.0,4096.0,0.0,1.0,,,4095.0,0.0,,
211,05a548fd37b6e44ab97a48dabee57c21,16,12,2642,11,00,,,,MONOCHROME2,...,2048.0,4096.0,0.0,1.0,,,,,,
212,0ecaae458f6ca4c8f4dd9975d62a6fa4,16,16,2157,15,00,1.0,000Y,M,MONOCHROME2,...,32767.0,65535.0,,,,,,,,


In [14]:
df

Unnamed: 0,file,PredictionString
0,0403dda5a9bf46457517b604869d530d,14 0 0.0 0.0 1.0 1.0 11 0.06 1266.0 312.0 1509...
1,034b98d64dc012298afb3d33fe880193,14 0 0.0 0.0 1.0 1.0 0 0.02 1249.0 492.0 1507....
2,056815f22ac3de9b9eb6fb963d4cfa5a,14 0 0.0 0.0 1.0 1.0 8 0.02 879.0 1960.0 969.0...
3,07229c769cb2c284c1114c6fe7ed3dd6,0 0.5800000000000001 1228.25 805.7499999999999...
4,03ea976a276b66285825246525357949,11 0.18 835.0 537.4999999999999 985.5 663.4999...
...,...,...
209,013c169f9dad6f1f6485da961b9f7bf2,11 0.04 745.0 413.0 945.0 485.0 7 0.1399999999...
210,0ecb6d6e449d5f71b183a6f7fe214c10,11 0.26 1484.5000000000002 534.9999999999999 1...
211,0e36597f559be16b6304f3771082c82a,11 0.16 385.99999999999994 2336.9999999999995 ...
212,0eddd00dffcdacb1c8ac0d0547c545a7,13 0.16000000000000003 1967.3333333333328 1196...


# Send final predictions back to SingleStore

In [12]:
%%time
s2conn = create_engine('mysql+pymysql://root:Sglstrpw34@172.31.62.112:3306/PatientRecords')

db = create_engine(s2conn, encoding='utf8')
connection = db.raw_connection()
 
# define parameters to be passed in and out
parameterIn = 1
parameterOut = "@parameterOut"
try:
    cursor = connection.cursor()
    cursor.callproc("storedProcedure", [parameterIn, parameterOut])
    # fetch result parameters
    results = list(cursor.fetchall())
    cursor.close()
    connection.commit()
finally:
    connection.close() 

('7aa301c90a61603f9b5e2e05a88b4204', 13, Decimal('0.240000000'), Decimal('492.333333333'), Decimal('1287.000000000'), Decimal('744.333333333'), Decimal('1461.333333333'), None, None, None, None, None, None, None, None, 'c5261ea38786c17f68423d23b95d030c7545ef197ddfbb7efa99bf62decef303ba1796eddeae62374b8da450f89bdf568d78e32aad26da3cc84bf0bfbd6d3740')


# Switch back to Pytorch_p37 environment and final test display image

In [31]:
%%time
s2conn = create_engine('mysql+pymysql://root:Sglstrpw34@172.31.62.112:3306/PatientRecords')
# df.to_sql('ImageHeaderdf', s2conn, if_exists='replace', index = False)
df2 = pd.read_sql_table('ImageHeaderdf', s2conn)

CPU times: user 18.1 ms, sys: 4.47 ms, total: 22.6 ms
Wall time: 139 ms


  "detect unicode returns: %r" % de
  util.warn("Unknown schema content: %r" % line)


In [32]:
df2

Unnamed: 0,file,PredictionString
0,121e650bba0d2537b821fb20bcc0db86,7 0.06000000000000001 249.5 472.5 745.0 1455.0...
1,04b68e83e611caf345b0a1dc9c65ec88,11 0.4000000000000001 1703.0000000000002 673.9...
2,0c187ebe652499a7e28fd93da2e42ebb,14 0 0.0 0.0 1.0 1.0 3 0.06 878.0 1132.0 1640....
3,0e8b4d8bb1a8719e9e8f4755f13e34c4,0 0.7000000000000001 1233.3999999999999 771.19...
4,0b8ec8f17db23936e86de9ca3f36d206,11 0.04 1191.0 444.0 1467.0 492.0 14 0 0.0 0.0...
...,...,...
209,0271d381c3e88527721efcfaf518be71,3 0.8600000000000001 829.4000000000001 1758.6 ...
210,05e951c63e80999f13e6e09e7ec8439a,11 0.34 1255.3333333333333 93.33333333333331 1...
211,0ef99f92d68d16b8912cc073731ffad1,11 0.16 1415.0 447.0 1667.0 506.0 11 0.3200000...
212,0d60b79ab9aee1e4687e17fdc56ead06,0 0.64 1192.4 1180.8000000000002 1662.6 1581.0...


In [26]:
# Read a Dicom Image
def read_xray(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [27]:
%%time
img = read_xray('train/0108949daa13dc94634a7d650a05c0bb.dicom')
plt.figure(figsize = (12,12))
plt.imshow(img, 'gray')

NameError: name 'pydicom' is not defined

# Send to S2 (Ian)

In [None]:
s2conn = pymysql.connect(
    user='root',
    password='Sglstrpw34',
    host='172.31.62.112',
    port=3306,
    database='PatientRecords')

mycursor = s2conn.cursor()

sql = "select ImageID, ClassName, ClassID, RadiologestID, XMin, YMin, XMax, YMax from ImageTraining"

mycursor.execute(sql)

train = pd.DataFrame(mycursor.fetchall(), columns = ['image_id','class_name','class_id','rad_id','x_min','y_min','x_max','y_max'])

mycursor.close()
s2conn.close()
train

