In [1]:
from tensorflow.keras.applications.resnet50 import preprocess_input 
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import img_to_array
from keras.applications import imagenet_utils
import numpy as np
import cv2

%run ../1_image_pyramid/1.ipynb
%run ../2_sliding_window/2.ipynb
%run ../3_non_max_supression/3.ipynb

## 1-Initial Parameters

In [2]:
WIDTH = 600
HEIGHT = 600
PYR_SCALE = 1.5 # image pyramid scale
WIN_STEP = 16 # slicing window step
ROI_SIZE = (200, 150)
INPUT_SIZE = (224, 224) # input size of resnet

## 2-Loading Resnet50

### Nedir bu ImageNet?
- ImageNet projesi, görsel nesne tanıma yazılım araştırmalarında kullanılmak üzere tasarlanmış büyük bir görsel veritabanıdır. 
- 14 milyondan fazla görüntü, hangi nesnelerin resmedildiğini göstermek için proje insanlar tarafından elle açıklanmıştır ve görüntülerin en az bir milyonunda sınırlayıcı kutular da bulunmaktadır. 
- ImageNet, "balon" veya "çilek" gibi tipik bir kategoride, birkaç yüz resimden oluşan 20.000'den fazla kategori içerir.

In [3]:
print("Resnet50 Loading...\n")
model = ResNet50(weights = "imagenet", include_top = True)

Resnet50 Loading...



## 3-Read the Resized Original Image

In [4]:
orig = cv2.imread("images/husky.jpg")
orig = cv2.resize(orig, dsize = (WIDTH, HEIGHT))
cv2.imshow("Husky", orig)

if cv2.waitKey(0) == ord('q'): # exit with "q"
    cv2.destroyAllWindows()

In [5]:
print(orig.shape[:2])

(H,W) = orig.shape[:2]
print((H,W))

(600, 600)
(600, 600)


## 4-Apply Image Pyramid

In [6]:
pyramid = image_pyramid(orig, PYR_SCALE, ROI_SIZE)
pyramid # return generator object

<generator object image_pyramid at 0x000002F14AEABC10>

## 5-Apply Sliding Window

In [7]:
rois = [] # region of interest
locs = [] # locations

In [8]:
for image in pyramid:
    
    scale = W/float(image.shape[1])
    
    for (x, y, roiOrig) in sliding_window(image, WIN_STEP, ROI_SIZE):
        
        x = int(x * scale)
        y = int(y * scale)
        w = int(ROI_SIZE[0]*scale) # 200
        h = int(ROI_SIZE[1]*scale) # 150
        
        roi = cv2.resize(roiOrig, INPUT_SIZE) # select certain area in image
        roi = img_to_array(roi)
        roi = preprocess_input(roi)
    
        rois.append(roi)
        locs.append((x,y,x+w,y+h))
        
rois = np.array(rois, dtype = "float32")

## 6-Classification Operation

In [9]:
preds = model.predict(rois)

In [10]:
preds = imagenet_utils.decode_predictions(preds, top = 1)
preds[0:10]

[[('n02110185', 'Siberian_husky', 0.16746876)],
 [('n02110185', 'Siberian_husky', 0.3103942)],
 [('n01582220', 'magpie', 0.28680125)],
 [('n02110185', 'Siberian_husky', 0.4473787)],
 [('n02110185', 'Siberian_husky', 0.30284733)],
 [('n02110185', 'Siberian_husky', 0.36064222)],
 [('n02110185', 'Siberian_husky', 0.27733198)],
 [('n02110185', 'Siberian_husky', 0.5808004)],
 [('n02110185', 'Siberian_husky', 0.42883143)],
 [('n01560419', 'bulbul', 0.24929538)]]

In [19]:
labels = {}
min_conf = 0.9

for (i,p) in enumerate(preds):
    
    (_, label, prob) = p[0] # p[0]=> return 3 variable. imageNetId will not be used.
    
    if prob >= min_conf: # prov >= 0.9
        
        box = locs[i]
        
        L = labels.get(label, [])
        L.append((box, prob))
        labels[label] = L

In [20]:
labels.keys()

dict_keys(['Eskimo_dog'])

## 7-Appliying Non Max Suppression

In [18]:
for label in labels.keys():
    
    clone = orig.copy()
    
    for (box, prob) in labels[label]: # extraction box location.
        (startX, startY, endX, endY) = box
        cv2.rectangle(clone, (startX, startY),(endX, endY), (0,255,0),2)
    
    cv2.imshow("first image ", clone)
    
    clone = orig.copy()
    
    # non-maxima
    boxes = np.array([p[0] for p in labels[label]])
    proba = np.array([p[1] for p in labels[label]])
    
    boxes = non_max_suppression(boxes, proba)
    
    for (startX, startY, endX, endY) in boxes:
        cv2.rectangle(clone, (startX, startY),(endX, endY), (0,255,0),2)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.putText(clone, label, (startX , y), cv2.FONT_HERSHEY_SIMPLEX, 0.45,(0,255,0),2)
        
    cv2.imshow("Maxima", clone)
    
    if cv2.waitKey(0) == ord("q"): 
        cv2.destroyAllWindows()