# Script for running Machine-Learning models for Detecting Hot Spots
## Part 2 Inference

#### Requirements
* matplotlib
* numpy
* pandas
* pycaret 1.0.1
* rasterio

In [None]:
import glob
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

import pycaret
import pycaret.classification as cl

import rasterio

#### Settings 

In [None]:
MODEL = '../models/2020-10-30_catBoost_tuned_model'

### Automation

In [None]:
def run_inference_v2(model_path, infile_path, outfile_label_path, outfile_score_path):
    print("Processing dataset input:", infile_path)
    print("Processing dataset output:", outfile_label_path)
    print("Processing dataset output:", outfile_score_path)
    
    model = cl.load_model(model_path)
    try:
        model[1].n_jobs = -1
    except:
        pass
    # class translation
    transform_dict = dict(zip(np.arange(len(model[1].classes_)), model[1].classes_))
    
    with rasterio.open(infile_path) as src:
        ds_shape = src.shape 
        columns = src.descriptions
        data_reshaped = pd.DataFrame(src.read().reshape(27, -1).T, columns=columns)
    
    # calculate class probabilities
    proba = model[1].predict_proba(model[0].transform(data_reshaped[:]))
    proba_reshaped = proba.T.reshape(len(model[1].classes_), *(ds_shape))
    # get majority class
    class_reshaped = (np.vectorize(transform_dict.get)(np.argmax(proba, axis=1))).reshape(*(ds_shape))
    
    # write output
    with rasterio.open(infile_path) as src:
        with rasterio.open(outfile_label_path, 
                            mode='w', 
                            driver=src.driver, 
                            width=src.width, 
                            height=src.height, 
                            count=1, 
                            dtype='uint8', 
                            transform=src.transform,
                            crs=src.crs) as dst:
            dst.write_band(1, class_reshaped.astype(rasterio.uint8))

        with rasterio.open(outfile_score_path,
                            mode='w', 
                            driver=src.driver, 
                            width=src.width, 
                            height=src.height, 
                            count=len(model[1].classes_), 
                            dtype='uint8', 
                            transform=src.transform,
                            crs=src.crs) as dst:
            for id, layer in enumerate(model[1].classes_, start=1):
                dst.set_band_description(id, 'Class id: {}'.format(model[1].classes_[id-1]))
            dst.write(np.around(proba_reshaped*100, decimals=0).astype(rasterio.uint8))

## Run Processing 

In [None]:
flist = glob.glob(r'K:\127_HotSpotOptimizer\hot_spot_classifier\data_inference\*.tif')

In [None]:
flist[:]

In [None]:
for f in flist[:]:
    outfile_prediction = os.path.join(os.path.dirname(f), os.path.basename(f)[:-4] + '_classified.tif')
    outfile_score = os.path.join(os.path.dirname(f), os.path.basename(f)[:-4] + '_score.tif')
    pred = run_inference_v2(MODEL, f, outfile_prediction, outfile_score)