# Mix

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import matplotlib.pyplot as plt

from scipy import ndimage 

import os
import cv2

import pandas as pd
%matplotlib inline

Unnamed: 0,id,x,y
0,IMG2744_1,-9.380678,3.58272
1,IMG2744_2,-9.380678,3.58272
2,IMG2744_3,-9.380678,3.58272
3,IMG2744_4,-9.380678,3.58272
4,IMG2744_5,-9.380678,3.58272
...,...,...,...
7495,IMG4243_1,-4.680678,35.18272
7496,IMG4243_2,-4.680678,35.18272
7497,IMG4243_3,-4.680678,35.18272
7498,IMG4243_4,-4.680678,35.18272


In [3]:
# Load images and resize 
root= './train'

def load_data(root_path):
    names = []
    train_crude = []

    for file in os.listdir(root):
        img = cv2.imread(os.path.join(root, file))#,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (img.shape[1]//3,img.shape[0]//3))
        train_crude.append(img)
        names.append(file.split('.')[0])
    
    data = np.stack( train_crude, axis=0 )
    return names, data

names, data=load_data(root)

In [65]:
data.shape

7500

### Extract feature CNN

In [5]:
tf.__version__

'2.6.0'

In [5]:
from tensorflow.keras.applications.resnet_v2 import preprocess_input, decode_predictions
from tensorflow.keras.applications.resnet_v2 import ResNet50V2

base_model = ResNet50V2(weights='imagenet',input_shape=[163, 226,3],include_top=False)

In [66]:
#get features in batches and flatten

def generate_features(data,base_model, test=False):
    feat = []
    batch_size=500
    num_samples=data.shape[0]
    if test:
        batch_size=10
    for i in range(0,num_samples,batch_size):
        temp = base_model.predict(preprocess_input(data[i:i+batch_size]))
        feat.append(temp)


    feat0 = np.concatenate(feat, axis=0) 
    feat2 = [i.ravel() for i in feat0]
    feat2 = np.stack(feat2, axis=0) #these are the features
    
    return feat2

feat=generate_features(data,base_model)

In [8]:
print(feat.shape)

(7500, 98304)


### Nearest neighbor

In [9]:

from sklearn.neighbors import NearestNeighbors

def get_nearest_neighbours(features, *args):
    
    neighTest = NearestNeighbors(n_neighbors=5)
    predictions=None
    
    if not args:
        train = features[:7000]
        validation = features[7000:]
        neighTest.fit(train)
        predictions = neighTest.kneighbors(validation)
    else:
        test_features=args[0] 
        neighTest.fit(features)
        predictions=neighTest.kneighbors(test_features)
    
    return predictions
        
predictions=get_nearest_neighbours(feat)

In [10]:
predictions[0][0] # Distances

array([437.69397, 452.00183, 455.92117, 459.7137 , 459.8171 ],
      dtype=float32)

In [11]:
predictions[1][0] # Indices

array([2964,  684,  322, 3702, 6431], dtype=int64)

In [None]:
plt.subplots(figsize=(15,15))
for ix,im in enumerate(predictions):
    toShow = data[im]
    plt.subplot(1,6,ix+1)
    plt.imshow(toShow, cmap='gray')  
    plt.title(im)
    plt.axis('off')
plt.show()

### SIFT

In [54]:
#Generates de key points and descriptors of the neighbours of the images to be predicted

def generate_descriptors_neighbours(predictions):
    sift = cv2.SIFT_create()

    key_points=[]
    descriptors=[]
    for  i, i_n in enumerate(predictions[1]): #Iterate over indices of the predicted nearest neighbours
        # print(i_n)
        kp_neighbours=[]
        desc_neighbours=[]
        for neigh in i_n: # Iterates over the indices of the k nearest neighbours of the instance being predicted
            img=data[neigh]
            gray_neighbour= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)# A neighbour of the instances being predicted
            kp, des = sift.detectAndCompute(gray_neighbour,None) # key points and descriptors of the neighbour of instance being predicted
            kp_neighbours.append(kp)
            desc_neighbours.append(des)

        key_points.append(kp_neighbours)
        descriptors.append(desc_neighbours)
    
    return key_points, descriptors

kpts,descriptors2=generate_descriptors_neighbours(predictions)


In [55]:
len(descriptors2[0])

5

In [56]:
#Generates de key points and descriptors of the images to be predicted
validation_img=data[7000:]

def generate_descriptors(data_img):
    sift = cv2.SIFT_create()
    
    key_points=[]
    descript=[]

    for  img in data_img: #Iterate over indices of predictions
        # print(i_n)
        gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        kp, des = sift.detectAndCompute(gray,None) # key points and descriptors of the instance being predicted
        key_points.append(kp)
        descript.append(des)
    
    return key_points, descript

kp, descriptors1 = generate_descriptors(validation_img)
#descript_dev=np.array(descript_dev)

In [57]:
len(descriptors1[0][0])

128

In [74]:
def find_matches(matcher,des1, des2):
    # Matching descriptor using KNN algorithm
    try:
        matches = matcher.knnMatch(des1,des2,k=2)
    except:
        return None

    # Store all good matches as per Lowe's Ratio test.
    ratio = 0.6
    good = []
    for m,n in matches:
        if m.distance < ratio*n.distance:
            good.append(m)
    return len(good)

def match(desc_list1, desc_list2, predictions):
    
    # FLANN parameters and initialize
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks=50)   # or pass empty dictionary
    flann = cv2.FlannBasedMatcher(index_params,search_params)
    
    best_neighbours_matched=[]
    for i, desc1 in enumerate(desc_list1):
        # print('\n\n')
        # print('image {}'.format(i))
        matches_list=[]
        ind_neighbour=[]
        for j, desc2 in enumerate(desc_list2[i]):
            #print(j)
            if desc2 is not None:
                #print('neighb {}'.format(j))
                n_matches = find_matches(flann, desc1, desc2)
                if n_matches is not None:
                    ind_neighbour.append(predictions[1][i][j]) #Appends index in the data of the neighbour from the image being predicted
                    matches_list.append(n_matches) #Appends matches between the current neighbour and the instance being predicted
                else:
                    continue
        
        #print('len ind neigh',ind_neighbour)
        #print('len matches list',matches_list)
        neighb_matches=np.stack([ind_neighbour,matches_list],axis=1) #Creates numpy array. Rows are descriptors of different neighbours. 
                                                                     # First column is index in the data, Second column is how many matches that neighbour has with the current instance
        neighb_matches=neighb_matches[neighb_matches[:, 1].argsort()[::-1][:len(matches_list)]]  #Sorts array in descending order, more matches on top and less matches at the bottom :5 is the number of neighbours
        #print(neighb_matches)
        best_neighbours_matched.append(neighb_matches)
    
    return best_neighbours_matched
                                 
k_best_matches=match(descriptors1,descriptors2)

In [60]:
k_best_matches[]
#len(k_best_matches)
#for k in range(len(k_best_matches)):
#    print(k)
#    k_best_matches[k][0][1]

array([], shape=(0, 2), dtype=float64)

## TEST predictions

In [None]:
root= './test'
names_test, test_img = load_data(root)


In [67]:
test_feat=generate_features(test_img,base_model, test=True)

In [68]:
test_feat.shape

(1200, 98304)

In [69]:
predicted_neigh = get_nearest_neighbours(feat, test_feat)

In [70]:
kps, descript_neigh = generate_descriptors_neighbours(predicted_neigh)

In [72]:
kps2, descript_test=generate_descriptors(test_img)

In [75]:
k_best_matches_test=match(descript_test, descript_neigh,predicted_neigh)

In [115]:


def give_preds(k_best_matches_test,predicted_neigh, df_coordinates,names_test):
    """Peforms the prediction for the test_set using the k best matches or the best neighbours in case there are no matches produced by SIFT

    Keyword arguments:
    k_best_matches_test -- k best matches list generated by feature matching using SIFT
    predicted_neigh -- predicted k nearest neighbours by the pre-trained model
    df_coordinates -- dataframe where the coordinates of the training points are
    names_test =
    """
    pred_coordinates=[]
    record_no_matches=[]
    for i, best in enumerate(k_best_matches_test):
        if best.size != 0:
            index_best=best[0][0] # This is the index of the neighbour with more matches
            pred_coordinates.append([names_test[i],df_coordinates.loc[index_best,'x'],df_coordinates.loc[index_best,'y']])
        else:
            # if there were no matches, use the average of the locations of the neighbours found by the pre-trained model
            x_coord=np.array([df_coordinates.loc[j,'x'] for j in predicted_neigh[1][i]]).mean()
            y_coord=np.array([df_coordinates.loc[j,'y'] for j in predicted_neigh[1][i]]).mean()
            pred_coordinates.append([names_test[i], x_coord, y_coord])
    
    array_preds=np.stack(pred_coordinates, axis=0)
   
    df_predictions=pd.DataFrame(array_preds, columns=['id','x','y'])
    return df_predictions

#Load the coordinates data set
coordinates = pd.read_csv('train.csv')
            
df_preds = give_preds(k_best_matches_test, predicted_neigh, coordinates,names_test)
        
    
    

In [116]:
df_preds.to_csv('resnet_knn_sift.csv',index=False)

In [102]:
coubnnt=0
for k in range(len(k_best_matches_test)):
    if k_best_matches_test[k].size ==0:
        print(k)
        coubnnt+=1
    else:
        continue
        #print(k_best_matches_test[k][0][1])
print(coubnnt)

10
14
17
30
34
46
57
81
86
99
100
102
131
160
168
177
188
209
216
223
228
286
300
313
331
341
343
353
360
370
375
377
385
399
404
411
412
417
426
430
450
463
525
548
557
586
587
594
605
613
625
647
657
663
670
707
737
755
773
791
793
795
816
826
828
874
877
888
889
901
924
929
958
965
976
977
981
983
991
996
1001
1020
1021
1023
1024
1040
1044
1057
1061
1069
1072
1079
1086
1093
1128
1130
1131
1145
1147
1152
1161
1197
1198
1199
104
