# Anadolu Sigorta Datathon Challenge

## Libraries

In [1]:
import os

from IPython.display import Image, display
from tensorflow.keras.preprocessing.image import load_img
import PIL
from PIL import ImageOps

from tensorflow import keras
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array, array_to_img

from tensorflow.keras import layers
from tensorflow.keras.metrics import MeanIoU
from tensorflow.keras.models import load_model
from keras import backend as K

import random

## Helpers

In [2]:
def rle(img):
    bytes = np.where(img.T.flatten()==1)[0]
    runs = []
    prev = -2
    for b in bytes:
        if (b>prev+1): runs.extend((b+1, 0))
        runs[-1] += 1
        prev = b
    return ' '.join([str(i) for i in runs])

In [3]:
class AnadoluSigortaTest(keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, img_size, test_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.test_img_paths = test_img_paths
        
    def __len__(self):
        return len(self.test_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns test images correspond to batch #idx."""
        i = idx * self.batch_size
        batch_test_img_paths = self.test_img_paths[i : i + self.batch_size]
        
        x = np.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_test_img_paths):
            org_img = load_img(path)
            img = load_img(path, target_size=self.img_size)
            x[j] = img
            
            mask_name = path.split('/')[-1].split('.')[0] + '_' + type_name
        
        return x, org_img.width, org_img.height, mask_name

In [4]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

## Settings & Parameters

In [5]:
type_names = ['id1', 'id2', 'id3', 'id4'] 

model_dir = f"/kaggle/input/anadoludatathonmodelsv7/"
test_dir = "/kaggle/input/datathon-challenge/Testing_Imgs/Testing_Imgs/"

img_size = (1024, 1024)
batch_size = 1

threshold = 1

## Test Data

In [6]:
test_img_paths = sorted(
    [
        os.path.join(test_dir, fname)
        for fname in os.listdir(test_dir)
        if fname.endswith(".jpeg")
    ]
)

## Load models and predict in a loop

In [7]:
all_predictions = pd.DataFrame(columns=['filename_class', 'encoded_mask'])

In [8]:
for type_name in type_names:
    
    model_name = f'anadolu_segmentation_{type_name}.h5'
    
    model = load_model(os.path.join(model_dir, model_name), custom_objects={"dice_coef": dice_coef})
    
    test_gen = AnadoluSigortaTest(batch_size, img_size, test_img_paths)
    
    predictions = dict()
    
    for x, width, height, mask_name in test_gen:
    
        # Get predictions
        test_pred = model.predict(x)
        
        # Get rid of the batches axis
        test_pred = test_pred.reshape(img_size + (1,))
        
        # Convert proba to class 1,0 using threshold
        mask = np.where(test_pred >= threshold, 1, 0)
        
        # Convert mask array to image for resizing the test image original size
        mask_img = array_to_img(mask)
        original_mask = img_to_array(mask_img.resize((width, height)))
        
        # RLE encoding
        rle_encoded_pred = rle(original_mask)
        
        # Add to dictionary
        predictions[mask_name] = rle_encoded_pred
        
    # Convert dict to dataframe and append to all_predictions df    
    df_pred = pd.DataFrame(predictions, index=[0]).T.reset_index()
    df_pred.columns = ['filename_class', 'encoded_mask']
    
    all_predictions = pd.concat([all_predictions, df_pred])

## Load sample submission for left join

In [9]:
sample_sub = pd.read_csv('/kaggle/input/datathon-challenge/sample_submission.csv')

In [10]:
final_predictions = all_predictions[all_predictions['filename_class'].isin(sample_sub['filename_class'].to_list())]

In [11]:
final_predictions.to_csv("third_submission.csv", index=False)