# Infer with Fastai, 512 ensembles

* use the trained set on the 512px random tiles with TMA reduced by 2x.
* Use 512 models for lr = .01,.025,.036 and then take mode of the three

* NOTE: if training was scaled, infer needs to be as well.  Need to keep track of this.



# Installs

In [None]:
!ls /kaggle/input/pyvips-python-and-deb-package

!dpkg -i --force-depends /kaggle/input/pyvips-python-and-deb-package/linux_packages/archives/*.deb

!pip install pyvips -f /kaggle/input/pyvips-python-and-deb-package/python_packages/ --no-index
!pip list | grep pyvips

# Imports

In [None]:
import os
import shutil
import glob, pathlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
sns.set_style('darkgrid')
from statistics import mode

import pyvips
pyvips.__version__
#from PIL import Image

# Load Data

In [None]:
top_dir = '/kaggle/input/UBC-OCEAN'
test_dir = '/kaggle/input/UBC-OCEAN/test_images/'
test_thumb_dir = '/kaggle/input/UBC-OCEAN/test_thumbnails/'

In [None]:
ss = pd.read_csv('/kaggle/input/UBC-OCEAN/sample_submission.csv')
x_test = pd.read_csv('/kaggle/input/UBC-OCEAN/test.csv')
x_test.head()

In [None]:
# write_dir = '/kaggle/working/resized_test'
# if not os.path.exists(write_dir):
#     os.mkdir(write_dir)

In [None]:
# loop through test folder and make a new resized folder
from pathlib import Path

# set the training path to use thumbs if available
def set_test_path(n):
    
#     pth = f'{test_thumb_dir}{n}_thumbnail.png'
#     if os.path.exists(pth):
#         return Path(f'{test_thumb_dir}{n}_thumbnail.png')
#     else:

    return Path(f'{test_dir}{n}.png')

    
x_test['test_path'] = x_test.image_id.apply(lambda x: set_test_path(x))

In [None]:
x_test

# Fastai Model

In [None]:
import fastai
from fastai.vision.all import *
import timm

fastai.__version__

## Swap out model here

In [None]:
# load learner - don't have a trained model on the dataset yet.
learn_inf1 = load_learner('/kaggle/input/ubc-ocean-single-tile-models/random_512px_tma_reduced_2x_train.pkl')
learn_inf2 = load_learner('/kaggle/input/ubc-ocean-single-tile-models/random_512px_tma_reduced_2x_train_lr_0025.pkl')
learn_inf3 = load_learner('/kaggle/input/ubc-ocean-single-tile-models/random_512px_tma_reduced_2x_train_lr_0036.pkl')
print('learner loaded')

# Predictions
* First try thumbnails
* Removed writing anything and using thumbnails...hopefully will submit

In [None]:
# THIS HAS TO MATCH TRAINING PIPELINE
# should be generic so I can use the same function for train and test!

def get_random_tile(image, tile_size=512):
    
    half_tile = int(tile_size/2)
    
    # get width and height
    h, w = image.height, image.width

   # halve TMA files (not sure if this is correct)
    if image.width in [2964,3388]:
        image = image.affine((.5, 0, 0, .5))  
        h, w = image.height, image.width
 
    quality = 0.0
    thresh = 0.98
    count = 0
    failed = 0
    
    # come up with a random crop
    while quality < thresh:
        
        # get two random values
        rand_h = np.random.randint(0,h-tile_size)
        rand_w = np.random.randint(0,w-tile_size)
        #print(f'Random values w,h: {rand_w},{rand_h}')

        # get params to crop and produce cropped image
        top = rand_h
        left = rand_w
        
        try: 
            # not sure yet which is better: Image.crop, or region.fetch
#           im_out = im_in.crop(top,left,tile_size,tile_size)  # crop original image
            region = pyvips.Region.new(image)
            im_data = region.fetch(top,left,tile_size,tile_size)
            np_data = np.ndarray(buffer=im_data, dtype=np.uint8, shape=[tile_size, tile_size, 3])
            im_out = pyvips.Image.new_from_array(np_data)
            num_gt_0 = ((im_out > 0).avg() * im_out.width * im_out.height * im_out.bands) / 255
            quality = num_gt_0/(3*tile_size**2)  # this is the perecentage of not black pixels
            
        except:
            # try again with new random values, max = 50x
  
            quality = 0.0
            count +=1 
            
            # if time out, just return an empty image
            if count > 50:  
                print('Timed out, failed image')
                arr = np.zeros((tile_size,tile_size))  # not sure why I have to just use a 2d array here (?)  256x256x3 crashes
                im_out = pyvips.Image.new_from_array(arr)
                failed +=1
                break

    return im_out

## Loop through test data, process, and predict

In [None]:
%%time
# loop through test and make predictions
tile_size = 1024
preds = []
for idx in range(len(x_test)):
    
    #load image with pyvips and resize.
    img_path = x_test.iloc[idx].test_path
    #im = pyvips.Image.thumbnail(img_path, 256)  # don't want to do this, crop to center instead

    # load file
    im = pyvips.Image.new_from_file(img_path,access='sequential')

    # call function to get random tile 
    im_out = get_random_tile(im,tile_size)

    # make predictions
    pred1 = learn_inf1.predict(np.asarray(im_out))[0]#x_test.iloc[idx].test_path) 
    pred2 = learn_inf2.predict(np.asarray(im_out))[0]
    pred3 = learn_inf3.predict(np.asarray(im_out))[0]
    
    best_pred = mode([pred1,pred2,pred3])

    preds.append(best_pred)

preds

# Submit preds

In [None]:
ss.label = preds
ss.to_csv('submission.csv',index=False)
ss.head()