In [None]:
from PIL import Image
import numpy as np
import pandas as pd
import os
import math
import cv2
import tensorflow as tf
import itertools
from sklearn import preprocessing
import pickle
import matplotlib.cm as cm
import random
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix 

In [None]:
test_folder = 'semi_test/'
test_file = 'test.csv'
model = tf.keras.models.load_model('writer_model.h5')
model_weights = model.load_weights('writer_model_weights.h5')

for index, layer in enumerate(model.layers):
    print(index, layer)

model.trainable = False
maxlen = 78

with open('mm_scaler_m.pkl','rb') as f:
    mm_scaler_m = pickle.load(f)
    
with open('mm_scaler_n.pkl','rb') as f:
    mm_scaler_n = pickle.load(f)
    

In [None]:
df_test = pd.read_csv(test_file)
print(df_test.head(4))
print(df_test.shape)


In [None]:
reshape_shape = (170, 690)

def add_margin(pil_img, top, right, bottom, left, color):
    width, height = pil_img.size
    new_width = width + right + left
    new_height = height + top + bottom
    result = Image.new(pil_img.mode, (new_width, new_height), color)
    result.paste(pil_img, (left, top))
    return result

def resize(open_image):
    ratio = open_image.size[0] / open_image.size[1]
    if ratio < 6.0:
        new_shape = (728, 182)
        resized_img = open_image.resize(new_shape, Image.LANCZOS)
        im = resized_img.crop((10, 5, 700, 175))
        return im
        
    elif ratio >= 6.0 and ratio < 12.0:
        new_shape = (728, 104)
        resized_img = open_image.resize(new_shape, Image.LANCZOS)
        resized_img = resized_img.crop((10, 5, 700, 95))
        im = add_margin(resized_img, 40, 0, 40, 0, (255))
        return im
        
    else:
        new_shape = (728, 56)
        resized_img = open_image.resize(new_shape, Image.LANCZOS)
        resized_img = resized_img.crop((10, 10, 700, 50))
        im = add_margin(resized_img, 65, 0, 65, 0, (255))
        
        return im

    

In [None]:
def get_feature(img1, img2, orb, bf):
    kp1, des1 = orb.detectAndCompute(img1,None)
    kp2, des2 = orb.detectAndCompute(img2,None)
    try:
        if des1.all()!=None and des2.all()!=None:
            return des1, des2
            
    except AttributeError:
        return [], []
    except ValueError:
        return [], []
 

In [None]:
def make_predictions(df, model):
    orb = cv2.ORB_create(90)
    bf = cv2.BFMatcher()
    img1_features = []
    img2_features = []
    predictions = []
    img1_name = []
    img2_name = []
    prediction_probs = []
    
    for index, row in df.iterrows():
        print(index)
        img1_name.append(row['img1_name'])
        img2_name.append(row['img2_name'])
        img1 = Image.open(test_folder + row['img1_name'])
        img2 = Image.open(test_folder + row['img2_name'])
        img1 = resize(img1)
        img2 = resize(img2)

        img1 = np.asarray(img1)
        img2 = np.asarray(img2)

        img1 = np.expand_dims(img1, axis = -1)
        img2 = np.expand_dims(img2, axis = -1)
        print(img1.shape, img2.shape)
        m_list, n_list = get_feature(img1, img2, orb, bf)
        if len(m_list) == 0 or len(n_list) <= 1:
            temp_prob = np.random.rand()
            prediction_probs.append(temp_prob)
            if temp_prob >= 0.5:
                predictions.append(1)
            else:
                predictions.append(0)
            
            
        else:
            img1_features_val = []
            img2_features_val = []
            img1_features_val.append(m_list)
            img2_features_val.append(n_list)
            img1_features_val = tf.keras.utils.pad_sequences(img1_features_val, maxlen = maxlen)
            img2_features_val = tf.keras.utils.pad_sequences(img2_features_val, maxlen = maxlen)

            img1_features_val = np.array(img1_features_val)
            img2_features_val = np.array(img2_features_val)

            img1_features_val_shape = img1_features_val.shape
            img2_features_val_shape = img2_features_val.shape

            img1_features_val = np.reshape(img1_features_val, (img1_features_val.shape[0], img1_features_val.shape[1] * img1_features_val.shape[2]))
            img2_features_val = np.reshape(img2_features_val, (img2_features_val.shape[0], img2_features_val.shape[1] * img2_features_val.shape[2]))

            m_val = mm_scaler_m.transform(img1_features_val)
            n_val = mm_scaler_n.transform(img2_features_val)

            m_val = np.reshape(m_val, (img1_features_val_shape[0], img1_features_val_shape[1], img1_features_val_shape[2]))
            n_val = np.reshape(n_val, (img2_features_val_shape[0], img2_features_val_shape[1], img2_features_val_shape[2]))

            model_pred = model.predict([m_val, n_val])
            prediction_probs.append(model_pred[0][0])
            if model_pred >= 0.5:
                predictions.append(1)
            else:
                predictions.append(0)
            
          
    return prediction_probs, predictions, img1_name, img2_name
        
prediction_probs, predictions, img1_name, img2_name = make_predictions(df_test, model)


In [None]:
output = pd.DataFrame(img1_name, columns=['img1_name'])
output['img2_name'] = img2_name
output['label'] = predictions
output['proba'] = prediction_probs
output.shape

In [None]:
cols = ['img1_name', 'img2_name']
output['id'] = output[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)    

In [None]:
output.head(3)


In [None]:
output = output.drop(['label', 'img1_name', 'img2_name'], axis = 1)


In [None]:
output.head(3)

In [None]:
output = output.loc[:,['id','proba']]

In [None]:
output.head(3)

In [None]:
output.to_csv('submission.csv', index = False)