This class breaks up the problem into 3 main sections:
- ```match_data```: matching the linescans to polygons to create training data
- ```error_fraction```: calculating the quality of a solution
- ```mask```: function to determine whether a coordinate is on fire

### Example:

In [3]:
%load_ext autoreload
%autoreload 2

from solution import Threshold

  shapely_geos_version, geos_capi_version_string


In [None]:
# Simple threshold masking
s = Threshold()

# Example: set the threshold to an arbitrary value
s.set_threshold(220) 

# Set kernel sizes
s.set_open_kernel(1)
s.set_close_kernel(64)

# Match training data
s.match_data()

# Get error
print('Error:', s.error_fraction())

### Simple optimisation routine
Try a range of thresholds and find the best one

In [None]:
error = 1.0

s = Threshold()
s.match_data()

# Set all kernels=1
s.set_open_kernel(1)
s.set_close_kernel(1)

print()

for t in range(180, 220, 5):
    s.set_threshold(t)
    e = s.error_fraction()

    if e < error:
        error = e

        print('Best solution so far:')
        print(' threshold    = ', t)
        print(' error        = ', error)
        print()

### SciPy Optimize
Optimise for multiple variables

In [14]:
s = Threshold()
s.match_data()

134 linescan datasets available
129 training linescan datasets available
37 linescans with matching polygons
28.7% of linescans used
45.0% of polygons used


In [None]:
from scipy import optimize
import numpy as np

def f(params):
    t, kc = params

    s.set_threshold(t)
    s.set_close_kernel(kc)
#     error = s.error_fraction()
    error = -s.f1_score()
    
    print('{0: 3.2f}   {1: 3.2f}   {2: 3.4e}'.format(t, kc, error))

    return error
    
initial_guess = [195, 64]
initial_simplex = np.zeros((3, 2))
initial_simplex[0,:] = [220, 800]
initial_simplex[1,:] = [220, 900]
initial_simplex[2,:] = [230, 900]

result = optimize.minimize(f, initial_guess, method='Nelder-Mead', options={'disp': True, 'initial_simplex': initial_simplex})

fitted_params = result.x
print(fitted_params)
    

### Save the submission file

In [13]:
s.set_threshold(227.94)
s.set_close_kernel(922)
s.error_fraction()
s.generate_submission(f'submission_threshold_227_kc_922.csv')

### Auto Threshold method
Use the standard deviation from mean for a given linescan as the threshold

In [None]:
from solution import AutoThreshold

s = AutoThreshold()

# Set the threshold in terms of standard deviations
s.set_threshold(1) 

# Set kernel size
s.set_close_kernel(1)

# Match training data
s.match_data()

# Get error
print('Error:', s.error_fraction())

### SciPy Optimize
Optimise AutoThreshold for multiple variables

In [1]:
from solution import AutoThreshold
s = AutoThreshold()
s.match_data()

  shapely_geos_version, geos_capi_version_string


134 linescan datasets available
129 training linescan datasets available
37 linescans with matching polygons
28.7% of linescans used
45.0% of polygons used


In [None]:
from scipy import optimize
import numpy as np

def f(params):
    t, kc = params

    s.set_threshold(t)
    s.set_close_kernel(kc)
    error = s.error_fraction()
    
    print('{0: 3.2f}   {1: 3.2f}   {2: 3.4e}'.format(t, kc, error))

    return error
    
initial_guess = [1, 1]
initial_simplex = np.zeros((3, 2))
initial_simplex[0,:] = [1,  1  ]
initial_simplex[1,:] = [1,  100]
initial_simplex[2,:] = [10, 100]

result = optimize.minimize(f, initial_guess, method='Nelder-Mead', options={'disp': True, 'initial_simplex': initial_simplex})

fitted_params = result.x
print(fitted_params)
    

In [3]:
s.set_threshold(5.92)
s.set_close_kernel(81)
s.error_fraction()
s.generate_submission(f'submission_autothreshold_5.92_kc_81.csv')