# Model Inference on Validation images (env = Conda_Anaconda3)

<a name="contents2"></a>

# Contents

- [Imports and Functions](#imports2)
- [Model Inference & Send to SingleStore](#inference2)

<a name="imports2"></a>

- [Back to Contents](#contents2)

# Example with 11 Dicom Image (Part 2)

## Imports and functions

In [1]:
%%time
# Imports
# !pip install PyMySQL
# !pip install torchvision
import numpy as np, pandas as pd
from glob import glob
import shutil, os
import matplotlib.pyplot as plt
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
import seaborn as sns
import os
import torch
from IPython.display import Image, clear_output
import time
from tqdm.notebook import tqdm
import pymysql
from sqlalchemy import create_engine
import warnings
warnings.filterwarnings('ignore')


# Model format conversion
def yolo2voc(image_height, image_width, bboxes):
    """
    yolo => [xmid, ymid, w, h] (normalized)
    voc  => [x1, y1, x2, y1]
    
    """ 
    bboxes = bboxes.copy().astype(float) # otherwise all value will be 0 as voc_pascal dtype is np.int
    
    bboxes[..., [0, 2]] = bboxes[..., [0, 2]]* image_width
    bboxes[..., [1, 3]] = bboxes[..., [1, 3]]* image_height
    
    bboxes[..., [0, 1]] = bboxes[..., [0, 1]] - bboxes[..., [2, 3]]/2
    bboxes[..., [2, 3]] = bboxes[..., [0, 1]] + bboxes[..., [2, 3]]
    
    return bboxes

# Functions
def GeneralEnsemble(dets, iou_thresh = 0.5, weights=None):
    assert(type(iou_thresh) == float)
    
    ndets = len(dets)
    
    if weights is None:
        w = 1/float(ndets)
        weights = [w]*ndets
    else:
        assert(len(weights) == ndets)
        
        s = sum(weights)
        for i in range(0, len(weights)):
            weights[i] /= s

    out = list()
    used = list()
    
    for idet in range(0,ndets):
        det = dets[idet]
        for box in det:
            if box in used:
                continue
                
            used.append(box)
            # Search the other detectors for overlapping box of same class
            found = []
            for iodet in range(0, ndets):
                odet = dets[iodet]
                
                if odet == det:
                    continue
                
                bestbox = None
                bestiou = iou_thresh
                for obox in odet:
                    if not obox in used:
                        # Not already used
                        if box[4] == obox[4]:
                            # Same class
                            iou = computeIOU(box, obox)
                            if iou > bestiou:
                                bestiou = iou
                                bestbox = obox
                                
                if not bestbox is None:
                    w = weights[iodet]
                    found.append((bestbox,w))
                    used.append(bestbox)
                            
            # Now we've gone through all other detectors
            if len(found) == 0:
                new_box = list(box)
                new_box[5] /= ndets
                out.append(new_box)
            else:
                allboxes = [(box, weights[idet])]
                allboxes.extend(found)
                
                xc = 0.0
                yc = 0.0
                bw = 0.0
                bh = 0.0
                conf = 0.0
                
                wsum = 0.0
                for bb in allboxes:
                    w = bb[1]
                    wsum += w

                    b = bb[0]
                    xc += w*b[0]
                    yc += w*b[1]
                    bw += w*b[2]
                    bh += w*b[3]
                    conf += w*b[5]
                    #print(f"conf = {conf}")
                
                xc /= wsum
                yc /= wsum
                bw /= wsum
                bh /= wsum    

                new_box = [xc, yc, bw, bh, box[4], conf]
                out.append(new_box)
    return out
    
def getCoords(box):
    x1 = float(box[0]) - float(box[2])/2
    x2 = float(box[0]) + float(box[2])/2
    y1 = float(box[1]) - float(box[3])/2
    y2 = float(box[1]) + float(box[3])/2
    return x1, x2, y1, y2
    
def computeIOU(box1, box2):
    x11, x12, y11, y12 = getCoords(box1)
    x21, x22, y21, y22 = getCoords(box2)
    
    x_left   = max(x11, x21)
    y_top    = max(y11, y21)
    x_right  = min(x12, x22)
    y_bottom = min(y12, y22)

    if x_right < x_left or y_bottom < y_top:
        return 0.0    
        
    intersect_area = (x_right - x_left) * (y_bottom - y_top)
    box1_area = (x12 - x11) * (y12 - y11)
    box2_area = (x22 - x21) * (y22 - y21)        
    
    iou = intersect_area / (box1_area + box2_area - intersect_area)
    return iou

# Function to transform the YOLOV5 output to the format the Ensemble function expects. 

def transform_object(df,tmp,flag):
    list_of_floats=[]
    for item in tmp:
        list_of_floats.append(float(item))
        
    tm=int(len(list_of_floats)/6)
    list1=[]
    list2=[]
    checkobj=[]
    for i in range(tm):
          
        j=i*6
        k=j
        l=k+6
            
        checkobj.append(k)
        xmin=list_of_floats[k+2]
        ymin=list_of_floats[k+3]
        xmax=list_of_floats[k+4]
        ymax=list_of_floats[k+5]
            
        box_w=xmax-xmin
        box_h=ymax-ymin
        box_x=xmin+(box_w/2)
        box_y=ymin+box_h/2
            
        list1=[box_x,box_y,box_w,box_h,int(list_of_floats[k]),list_of_floats[k+1]]
        
        list2.append(list1)        
        list1=[]
    if flag==0:
        return checkobj
    else:
        return list2
    
#https://www.kaggle.com/prashantkikani/vinbigdata-ensemble-post-processing?scriptVersionId=56245340

def divide(l, n):
    '''
    divide submission string into group of 6
    '''
    for i in range(0, len(l), n):  
        yield l[i:i + n]

def convertToBinaryData(ImageFile):
    # Convert digital data to binary format
    with open(ImageFile, 'rb') as file:
        binaryData = file.read()
    return binaryData


def updateBLOB(ImageID, ImagePath, ImageFile):
    #print("Inserting BLOB into JPGImages table")
    # try:
    mycursor = s2conn.cursor()

    sql_update_blob_query = """ Update JPGImages 
                        set PredictionImage = %s 
                        where file = %s """

    jpgImage = convertToBinaryData(ImageFile)

    # Convert data into tuple format
    insert_blob_tuple = (jpgImage, ImageID)
    result = mycursor.execute(sql_update_blob_query, insert_blob_tuple)
    s2conn.commit()
    mycursor.close() 

CPU times: user 1.4 s, sys: 139 ms, total: 1.54 s
Wall time: 1.56 s


<a name="inference2"></a>

- [Back to Contents](#contents2)

# Ensemble Model Inference and Send Results to SingleStore

In [2]:
%%time


# Bring Dicom metadata back from SingleStore


os.chdir('/home/ubuntu/vinbigdata/')
s2conn = create_engine('mysql+pymysql://root:Sglstrpw34@172.31.62.112:3306/PatientRecords')

test_df = pd.read_sql_table('ImageHeaderdf', s2conn)


# Run Yolov5 Ensemble model on new JPG folder


fold_exp = ['exp26']
test_dir = f'/home/ubuntu/vinbigdata/11DemoConversionToJPG/'
os.chdir('/home/ubuntu/vinbigdata/yolov5')

for fold, exp in enumerate(fold_exp):
    weights_dir = '''/home/ubuntu/vinbigdata/yolov5/runs/train/exp26/weights/best.pt \
/home/ubuntu/vinbigdata/yolov5/runs/train/exp27/weights/best.pt \
/home/ubuntu/vinbigdata/yolov5/runs/train/exp28/weights/best.pt \
/home/ubuntu/vinbigdata/yolov5/runs/train/exp29/weights/best.pt \
/home/ubuntu/vinbigdata/yolov5/runs/train/exp30/weights/best.pt'''
    
    os.chdir('/home/ubuntu/vinbigdata/yolov5/')
    
    !python detect.py --weights $weights_dir\
    --img 1024\
    --conf 0.2\
    --iou 0.4\
    --source $test_dir\
    --save-txt --save-conf --exist-ok
    

# Convert Yolo predictions to PascalVoc format, back to the original Dicom Size
    
    
    image_ids = []
    PredictionStrings = []

    for file_path in tqdm(glob('runs/detect/exp/labels/*txt')):
        image_id = file_path.split('/')[-1].split('.')[0]
        w, h = test_df.loc[test_df.file==image_id,['Columns', 'Rows']].values[0]
        f = open(file_path, 'r')
        data = np.array(f.read().replace('\n', ' ').strip().split(' ')).astype(np.float32).reshape(-1, 6)
        data = data[:, [0, 5, 1, 2, 3, 4]]
        bboxes = list(np.round(np.concatenate((data[:, :2], np.round(yolo2voc(h, w, data[:, 2:]))), axis =1).reshape(-1), 1).astype(str))
        for idx in range(len(bboxes)):
            bboxes[idx] = str(int(float(bboxes[idx]))) if idx%6!=1 else bboxes[idx]
        image_ids.append(image_id)
        PredictionStrings.append(' '.join(bboxes))

    # credit / source: https://www.kaggle.com/awsaf49/vinbigdata-cxr-ad-yolov5-14-class-infer
    pred_df = pd.DataFrame({'file':image_ids,
                            'PredictionString':PredictionStrings})
    df = pd.merge(test_df, pred_df, on = 'file', how = 'left').fillna("14 1 0 0 1 1")
    df = df[['file', 'PredictionString']]

    os.chdir('/home/ubuntu/vinbigdata/')
    

# Send images with predictions to SingleStore. Starting timer
start = time.time()

directory = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp'
jpgCount = 0



s2conn = pymysql.connect(
    user='root',
    password='Sglstrpw34',
    host='172.31.62.112',
    port=3306,
    database='Images')

for filename in os.listdir(directory):
        f = os.path.join(directory, filename)
        file = os.path.splitext(os.path.basename(os.path.basename(f)))[0]

        # checking if it is a file
        if os.path.isfile(f) and f.endswith(".jpg"):
            updateBLOB(file, directory, f)
            jpgCount += 1
            if jpgCount % 100 == 0:
                print (jpgCount)
print(jpgCount)
s2conn.close()


# Clear image prediction folders for reusability


dir = '/home/ubuntu/vinbigdata/yolov5/runs/detect/exp/'
for files in os.listdir(dir):
    path = os.path.join(dir, files)
    try:
        shutil.rmtree(path)
    except OSError:
        os.remove(path)
        

# Send prediction strings to SingleStore


s2conn = pymysql.connect(
    user='root',
    password='Sglstrpw34',
    host='172.31.62.112',
    port=3306,
    database='PatientRecords')

for ind in df.index:
    InFile = df['file'][ind]
    InPredictionString = df['PredictionString'][ind]
    mycursor = s2conn.cursor()
    sql = "call InsertImagePredictions('" + InFile + "','" + InPredictionString + "')"
    mycursor.execute(sql)
    s2conn.commit()
  
mycursor.close()
s2conn.close()
end = time.time()
print(f'Time to send prediction string and prediction images to SingleStore: {round((end - start),2)} seconds')

/bin/bash: switchml: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `switchml'
/bin/bash: _moduleraw: line 1: syntax error: unexpected end of file
/bin/bash: error importing function definition for `_moduleraw'
[34m[1mdetect: [0mweights=['/home/ubuntu/vinbigdata/yolov5/runs/train/exp26/weights/best.pt', '/home/ubuntu/vinbigdata/yolov5/runs/train/exp27/weights/best.pt', '/home/ubuntu/vinbigdata/yolov5/runs/train/exp28/weights/best.pt', '/home/ubuntu/vinbigdata/yolov5/runs/train/exp29/weights/best.pt', '/home/ubuntu/vinbigdata/yolov5/runs/train/exp30/weights/best.pt'], source=/home/ubuntu/vinbigdata/11DemoConversionToJPG/, imgsz=[1024, 1024], conf_thres=0.2, iou_thres=0.4, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=exp, exist_ok=True, line_thickness=3, hide_labels=False, hide

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))


11
Time to send prediction string and prediction images to SingleStore: 0.34 seconds
CPU times: user 454 ms, sys: 59.9 ms, total: 514 ms
Wall time: 14.2 s


- [Back to Contents](#contents2)