In [None]:
import numpy as np
import pandas as pd
from math import isclose

from lightgbm import LGBMRegressor
from dddex.levelSetKDEx import *
from dddex.loadData import *

from fastcore.test import *

## Loading Yaz Data

In [None]:
testDays = 182
data, XTrain, yTrain, XTest, yTest = loadDataYaz(testDays = testDays)
scalingList = data.loc[data['label'] == 'test', 'scalingValue'].tolist()

> [0;32m/home/kagu/dddex/dddex/loadData.py[0m(33)[0;36mloadDataYaz[0;34m()[0m
[0;32m     32 [0;31m    [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 33 [0;31m    [0mbaseDir[0m [0;34m=[0m [0mjoin[0m[0;34m([0m[0mmodulePath[0m[0;34m,[0m [0;34m'datasets'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     34 [0;31m    [0mdataFilename[0m [0;34m=[0m [0mjoin[0m[0;34m([0m[0mbaseDir[0m[0;34m,[0m [0;34m'dataYaz.csv'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  currentFile


'/home/kagu/dddex/dddex/loadData.py'


ipdb>  realPath


'/home/kagu/dddex/dddex/loadData.py'


ipdb>  dirPath


'/home/kagu/dddex/dddex'


ipdb>  q


BdbQuit: 

In [None]:
LGBM = LGBMRegressor(max_depth = 4, n_jobs = 1)
LGBM.fit(X = XTrain, y = yTrain)

In [None]:
assert XTest.shape[0] == len(data['id'].unique()) * testDays
assert yTest.shape[0] == len(data['id'].unique()) * testDays

In [None]:
LGBM = LGBMRegressor(max_depth = 4, n_jobs = 1)
LGBM.fit(X = XTrain, y = yTrain)

## LS_KDEx

In [None]:
LS_KDEx = LevelSetKDEx(estimator = LGBM, binSize = 100)
LS_KDEx.fit(XTrain, yTrain)

# Check if nothing weird happened to y and yPred
test_eq(LS_KDEx.y, yTrain)
test_eq(LS_KDEx.yPred, LGBM.predict(XTrain))

### Standard Attributes

In [None]:
# All train-indices have to part of indicesPerBin
# and duplicates mustn't exist
indicesList = list()

for values in LS_KDEx.indicesPerBin.values():
    indicesList.extend(values)
    
test_eq(set(indicesList), set(np.arange(XTrain.shape[0])))
test_eq(len(indicesList), XTrain.shape[0])

### Lower Bounds

In [None]:
# Lower-bound structure has to be correct
yPred = LS_KDEx.yPred
indicesPerBin = LS_KDEx.indicesPerBin
lowerBoundPerBin = LS_KDEx.lowerBoundPerBin

for i in range(len(indicesPerBin)):
    binIndex = list(indicesPerBin.keys())[i]
    indices = indicesPerBin[binIndex]
    
    minValue = yPred[indices].min()
    maxValue = yPred[indices].max()
    
    assert minValue >= lowerBoundPerBin.loc[binIndex]
    
    if binIndex < max(list(indicesPerBin.keys())):
        assert maxValue < lowerBoundPerBin.loc[binIndex + 1]

### getWeights

#### Standard Settings

In [None]:
# Weights-Output Test

indicesPerBin = LS_KDEx.indicesPerBin
lowerBoundPerBin = LS_KDEx.lowerBoundPerBin
yPredTest = LGBM.predict(XTest)
binPerPred = np.searchsorted(a = lowerBoundPerBin, v = yPredTest, side = 'right') - 1
indicesPerPred = [indicesPerBin[binIndex] for binIndex in binPerPred]

#---

weightsAll = LS_KDEx.getWeights(X = XTest, outputType = 'all')

# Check if every bin contains at least 100 observations
binSizesReal = [len(weightsAll[i]) for i in range(XTest.shape[0])]
assert (np.array(binSizesReal) >= 100).all()

test_eq(len(weightsAll), XTest.shape[0])

for i in range(len(weightsAll)):
    weights = weightsAll[i]
    
    assert all(weights >= 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(np.where(weights > 0)[0], np.sort(indicesPerPred[i]))

#---

weightsOnlyPos = LS_KDEx.getWeights(X = XTest, outputType = 'onlyPositiveWeights')

# Check if every bin contains at least 100 observations
binSizesReal = [len(weightsOnlyPos[i][1]) for i in range(XTest.shape[0])]
assert (np.array(binSizesReal) >= 100).all()

test_eq(len(weightsOnlyPos), XTest.shape[0])

for i in range(len(weightsOnlyPos)):
    weights = weightsOnlyPos[i][0]
    indices = weightsOnlyPos[i][1]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(indices, indicesPerPred[i])
    
#---

weightsSummarized = LS_KDEx.getWeights(X = XTest, outputType = 'summarized')

test_eq(len(weightsSummarized), XTest.shape[0])

for i in range(len(weightsSummarized)):
    weights = weightsSummarized[i][0]
    values = weightsSummarized[i][1]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(set(yTrain[indicesPerPred[i]]), set(values))
    
#---

weightsCumDistr = LS_KDEx.getWeights(X = XTest, outputType = 'cumulativeDistribution')

# Check if every bin contains at least 100 observations
binSizesReal = [len(weightsCumDistr[i][1]) for i in range(XTest.shape[0])]
assert (np.array(binSizesReal) >= 100).all()

test_eq(len(weightsCumDistr), XTest.shape[0])

for i in range(len(weightsCumDistr)):
    cumProb = weightsCumDistr[i][0]
    values = weightsCumDistr[i][1]
    
    assert all(cumProb > 0)
    assert isclose(cumProb.max(), 1)
    test_eq(cumProb, np.sort(cumProb))
    assert np.isclose(np.diff(cumProb), np.diff(cumProb)[0]).all()
    
    test_eq(values, np.sort(yTrain[indicesPerPred[i]]))

#---

weightsDistrSummarized = LS_KDEx.getWeights(X = XTest, outputType = 'cumulativeDistributionSummarized')

test_eq(len(weightsDistrSummarized), XTest.shape[0])

for i in range(len(weightsDistrSummarized)):
    cumProb = weightsDistrSummarized[i][0]
    values = weightsDistrSummarized[i][1]
    
    assert all(cumProb > 0)
    assert isclose(cumProb.max(), 1)
    test_eq(cumProb, np.sort(cumProb))
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(set(values), set(np.sort(yTrain[indicesPerPred[i]])))
    

#### ScalingList

In [None]:
# Testing scalingList 

indicesPerBin = LS_KDEx.indicesPerBin
lowerBoundPerBin = LS_KDEx.lowerBoundPerBin
yPredTest = LGBM.predict(XTest)
binPerPred = np.searchsorted(a = lowerBoundPerBin, v = yPredTest, side = 'right') - 1
indicesPerPred = [indicesPerBin[binIndex] for binIndex in binPerPred]

#---

weightsSummarized = LS_KDEx.getWeights(X = XTest, outputType = 'summarized', scalingList = scalingList)

test_eq(len(weightsSummarized), XTest.shape[0])

for i in range(len(weightsSummarized)):
    weights = weightsSummarized[i][0]
    values = weightsSummarized[i][1]
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(set(yTrain[indicesPerPred[i]] * scalingList[i]), set(values))
    
#---

weightsCumDistr = LS_KDEx.getWeights(X = XTest, outputType = 'cumulativeDistribution', scalingList = scalingList)

test_eq(len(weightsCumDistr), XTest.shape[0])

for i in range(len(weightsCumDistr)):
    cumProb = weightsCumDistr[i][0]
    values = weightsCumDistr[i][1]
    
    test_eq(values, np.sort(yTrain[indicesPerPred[i]]) * scalingList[i])

#---

weightsDistrSummarized = LS_KDEx.getWeights(X = XTest, outputType = 'cumulativeDistributionSummarized', scalingList = scalingList)

test_eq(len(weightsDistrSummarized), XTest.shape[0])

for i in range(len(weightsDistrSummarized)):
    cumProb = weightsDistrSummarized[i][0]
    values = weightsDistrSummarized[i][1]
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(set(values), set(np.sort(yTrain[indicesPerPred[i]]) * scalingList[i]))
    

#### predictQ

In [None]:
# Testing predictQ-method

indicesPerBin = LS_KDEx.indicesPerBin
lowerBoundPerBin = LS_KDEx.lowerBoundPerBin
yPredTest = LGBM.predict(XTest)
binPerPred = np.searchsorted(a = lowerBoundPerBin, v = yPredTest, side = 'right') - 1
indicesPerPred = [indicesPerBin[binIndex] for binIndex in binPerPred]
yTrainPerPred = [yTrain[indices] for indices in indicesPerPred]

#---

probs = [0.001, 0.5, 0.999]
quantileDict = LS_KDEx.predictQ(X = XTest, probs = probs, outputAsDf = False, scalingList = None)
quantileDf = LS_KDEx.predictQ(X = XTest, probs = probs, outputAsDf = True, scalingList = None)

test_eq(pd.DataFrame(quantileDict), quantileDf)
test_eq(list(quantileDict.keys()), probs)

for i in range(quantileDf.shape[0]):
    assert((np.diff(quantileDf.iloc[i,:]) >= 0).all())
    test_eq(yTrainPerPred[i].min(), quantileDf.loc[i, 0.001])
    test_eq(yTrainPerPred[i].max(), quantileDf.loc[i, 0.999])
    test_eq(np.quantile(a = yTrainPerPred[i], q = 0.5, method = 'inverted_cdf'), quantileDf.loc[i, 0.5])

## LS_KDEx_kNN

### Standard Attributes

In [None]:
LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = 100)
LS_KDEx_kNN.fit(XTrain, yTrain)

# Check if nothing weird happened to y and yPred
test_eq(LS_KDEx_kNN.y, yTrain)
test_eq(LS_KDEx_kNN.yPred, LGBM.predict(XTrain))

### getWeights

#### Standard Settings

In [None]:
# Weights-Output Test
LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = 100)
LS_KDEx_kNN.fit(XTrain, yTrain)

nn = LS_KDEx_kNN.nearestNeighborsOnPreds
yPredTest = LGBM.predict(XTest)
yPredTest_reshaped = np.reshape(yPredTest, newshape = (len(yPredTest), 1))

weightsList = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'onlyPositiveWeights')
binSizesReal = [len(weightsList[i][1]) for i in range(XTest.shape[0])]
distancesDf, neighborsMatrix = nn.kneighbors(X = yPredTest_reshaped, n_neighbors = max(binSizesReal))

# Check if all bins contain at least 100 observations
assert (np.array(binSizesReal) >= 100).all()

#---

weightsAll = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'all')

test_eq(len(weightsAll), XTest.shape[0])

for i in range(len(weightsAll)):
    weights = weightsAll[i]
    
    assert all(weights >= 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(set(neighborsMatrix[i, 0:binSizesReal[i]]), set(np.where(weights > 0)[0]))

#---

weightsOnlyPos = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'onlyPositiveWeights')

test_eq(len(weightsOnlyPos), XTest.shape[0])

for i in range(len(weightsOnlyPos)):
    weights = weightsOnlyPos[i][0]
    indices = weightsOnlyPos[i][1]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(set(neighborsMatrix[i, 0:binSizesReal[i]]), set(indices))
    
    if len(indices) > 100:
        np.isclose(np.diff(distancesDf[i, 99:binSizesReal[i]]), 0)
    
#---

weightsSummarized = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'summarized')

test_eq(len(weightsSummarized), XTest.shape[0])

for i in range(len(weightsSummarized)):
    weights = weightsSummarized[i][0]
    values = weightsSummarized[i][1]
    valuesByHand = yTrain[neighborsMatrix[i, 0:binSizesReal[i]]]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(set(valuesByHand), set(values))
    
#---

weightsCumDistr = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'cumulativeDistribution')

test_eq(len(weightsCumDistr), XTest.shape[0])

for i in range(len(weightsCumDistr)):
    cumProb = weightsCumDistr[i][0]
    values = weightsCumDistr[i][1]
    valuesByHand = yTrain[neighborsMatrix[i, 0:binSizesReal[i]]]
    
    assert all(cumProb > 0)
    assert isclose(cumProb.max(), 1)
    test_eq(cumProb, np.sort(cumProb))
    assert np.isclose(np.diff(cumProb), np.diff(cumProb)[0]).all()
    
    test_eq(values, np.sort(values))
    test_eq(np.sort(valuesByHand), values)

#---

weightsDistrSummarized = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'cumulativeDistributionSummarized')

test_eq(len(weightsDistrSummarized), XTest.shape[0])

for i in range(len(weightsDistrSummarized)):
    cumProb = weightsDistrSummarized[i][0]
    values = weightsDistrSummarized[i][1]
    valuesByHand = yTrain[neighborsMatrix[i, 0:binSizesReal[i]]]
    
    assert all(cumProb > 0)
    assert isclose(cumProb.max(), 1)
    test_eq(cumProb, np.sort(cumProb))
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(values, np.sort(values))
    test_eq(set(valuesByHand), set(values))
    

#### Artificially Big Bins

In [None]:
# Enforcing bins with size bigger than binSize
binSize = 10

LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = 10)

# Done to ensure that bins with binSize > 100 happen
XTrainDuplicated = np.concatenate([XTrain] * (binSize + 1), axis = 0)
yTrainDuplicated = np.concatenate([yTrain] * (binSize + 1), axis = 0)

LS_KDEx_kNN.fit(XTrainDuplicated, yTrainDuplicated)

#---

nn = LS_KDEx_kNN.nearestNeighborsOnPreds
yPredTest = LGBM.predict(XTest)
yPredTest_reshaped = np.reshape(yPredTest, newshape = (len(yPredTest), 1))

distancesDf, neighborsMatrix = nn.kneighbors(X = yPredTest_reshaped, n_neighbors = binSize + 1)

#---

weightsOnlyPos = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'onlyPositiveWeights')

test_eq(len(weightsOnlyPos), XTest.shape[0])

for i in range(len(weightsOnlyPos)):
    weights = weightsOnlyPos[i][0]
    indices = weightsOnlyPos[i][1]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    assert set(neighborsMatrix[i, 0:binSize]) <= set(indices)
    
    if len(indices) > 100:
        test_eq(distancesDf[i, binSize-1], distancesDf[i, binSize])

#### Bins with only 1 Unique Value

In [None]:
# Enforcing bins with only one unique value
binSize = 10

LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = 10)

# Done to ensure that bins with binSize > 100 happen
XTrainDuplicated = np.concatenate([XTrain] * binSize, axis = 0)
yTrainDuplicated = np.concatenate([yTrain] * binSize, axis = 0)

LS_KDEx_kNN.fit(XTrainDuplicated, yTrainDuplicated)

#---

weightsOnlyPos = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'summarized')

test_eq(len(weightsOnlyPos), XTest.shape[0])

for i in range(len(weightsOnlyPos)):
    weights = weightsOnlyPos[i][0]
    values = weightsOnlyPos[i][1]
    
    assert all(weights > 0)
    assert isclose(weights.sum(), 1)
    
    test_eq(len(values), 1)

#### ScalingList

In [None]:
# Testing scalingList 
binSize = 20

LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = binSize)

LS_KDEx_kNN.fit(XTrain, yTrain)
nn = LS_KDEx_kNN.nearestNeighborsOnPreds
yPredTest = LGBM.predict(XTest)
yPredTest_reshaped = np.reshape(yPredTest, newshape = (len(yPredTest), 1))

distancesDf, neighborsMatrix = nn.kneighbors(X = yPredTest_reshaped, n_neighbors = binSize)

#---

weightsSummarized = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'summarized', scalingList = scalingList)

test_eq(len(weightsSummarized), XTest.shape[0])

for i in range(len(weightsSummarized)):
    weights = weightsSummarized[i][0]
    values = weightsSummarized[i][1]
    
    test_eq(len(values), len(np.unique(values)))
    assert set(yTrain[neighborsMatrix[i, :]] * scalingList[i]) <= set(values)
    
#---

weightsCumDistr = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'cumulativeDistribution', scalingList = scalingList)

test_eq(len(weightsCumDistr), XTest.shape[0])

for i in range(len(weightsCumDistr)):
    cumProb = weightsCumDistr[i][0]
    values = weightsCumDistr[i][1]
    
    test_eq(values, np.sort(values))
    assert set(yTrain[neighborsMatrix[i, :]] * scalingList[i]) <= set(values)

#---

weightsDistrSummarized = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'cumulativeDistributionSummarized', scalingList = scalingList)

test_eq(len(weightsDistrSummarized), XTest.shape[0])

for i in range(len(weightsDistrSummarized)):
    cumProb = weightsDistrSummarized[i][0]
    values = weightsDistrSummarized[i][1]
    
    test_eq(len(values), len(np.unique(values)))
    test_eq(values, np.sort(values))
    assert set(yTrain[neighborsMatrix[i, :]] * scalingList[i]) <= set(values)
    

### predictQ

In [None]:
# Testing predictQ-method
binSize = 15

LS_KDEx_kNN = LevelSetKDEx_kNN(estimator = LGBM, binSize = binSize)

LS_KDEx_kNN.fit(XTrain, yTrain)
nn = LS_KDEx_kNN.nearestNeighborsOnPreds
yPredTest = LGBM.predict(XTest)
yPredTest_reshaped = np.reshape(yPredTest, newshape = (len(yPredTest), 1))

weightsList = LS_KDEx_kNN.getWeights(X = XTest, outputType = 'onlyPositiveWeights')
binSizesReal = [len(weightsList[i][1]) for i in range(XTest.shape[0])]
distancesDf, neighborsMatrix = nn.kneighbors(X = yPredTest_reshaped, n_neighbors = max(binSizesReal))

#---

probs = [0.001, 0.5, 0.999]
quantileDict = LS_KDEx_kNN.predictQ(X = XTest, probs = probs, outputAsDf = False, scalingList = None)
quantileDf = LS_KDEx_kNN.predictQ(X = XTest, probs = probs, outputAsDf = True, scalingList = None)

test_eq(pd.DataFrame(quantileDict), quantileDf)
test_eq(list(quantileDict.keys()), probs)

for i in range(quantileDf.shape[0]):
    
    assert((np.diff(quantileDf.iloc[i,:]) >= 0).all())
    
    binSizeReal = binSizesReal[i]
    valuesByHand = yTrain[neighborsMatrix[i, 0:binSizeReal]]
    
    test_eq(valuesByHand.min(), quantileDf.loc[i, 0.001])
    test_eq(valuesByHand.max(), quantileDf.loc[i, 0.999])
    test_eq(np.quantile(a = valuesByHand, q = 0.5, method = 'inverted_cdf'), quantileDf.loc[i, 0.5])

## Generate Bins

In [None]:
# Testing various artificial inputs of 'generateBins'

yPred = np.arange(100)
indicesPerBin, lowerBoundPerBin = generateBins(binSize = 10, yPred = yPred)

test_eq(list(indicesPerBin.keys()), [i for i in range(10)])

indicesPerBinTest = {0: np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                     1: np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
                     2: np.array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29]),
                     3: np.array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39]),
                     4: np.array([40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
                     5: np.array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59]),
                     6: np.array([60, 61, 62, 63, 64, 65, 66, 67, 68, 69]),
                     7: np.array([70, 71, 72, 73, 74, 75, 76, 77, 78, 79]),
                     8: np.array([80, 81, 82, 83, 84, 85, 86, 87, 88, 89]),
                     9: np.array([90, 91, 92, 93, 94, 95, 96, 97, 98, 99])}

for i in range(10):
    test_eq(indicesPerBin[i], indicesPerBinTest[i])
    
lowerBoundPerBinTest = [np.NINF, 9.5, 19.5, 29.5, 39.5, 49.5, 59.5, 69.5, 79.5, 89.5]

test_eq(list(lowerBoundPerBin), lowerBoundPerBinTest)
test_eq(list(lowerBoundPerBin.index), [i for i in range(10)])

#---

yPred = np.append(np.arange(100), np.arange(100))
indicesPerBin, lowerBoundPerBin = generateBins(binSize = 10, yPred = yPred)

test_eq(list(indicesPerBin.keys()), [i for i in range(20)])

indicesPerBinTest = {0: np.array([   0, 100,   1, 101,   2, 102,   3, 103,   4, 104]),
                     1: np.array([   5, 105,   6, 106,   7, 107,   8, 108,   9, 109]),
                     2: np.array([  10, 110,  11, 111,  12, 112,  13, 113,  14, 114]),
                     3: np.array([  15, 115,  16, 116,  17, 117,  18, 118,  19, 119]),
                     4: np.array([  20, 120,  21, 121,  22, 122,  23, 123,  24, 124]),
                     5: np.array([  25, 125,  26, 126,  27, 127,  28, 128,  29, 129]),
                     6: np.array([  30, 130,  31, 131,  32, 132,  33, 133,  34, 134]),
                     7: np.array([  35, 135,  36, 136,  37, 137,  38, 138,  39, 139]),
                     8: np.array([  40, 140,  41, 141,  42, 142,  43, 143,  44, 144]),
                     9: np.array([  45, 145,  46, 146,  47, 147,  48, 148,  49, 149]),
                     10: np.array([ 50, 150,  51, 151,  52, 152,  53, 153,  54, 154]),
                     11: np.array([ 55, 155,  56, 156,  57, 157,  58, 158,  59, 159]),
                     12: np.array([ 60, 160,  61, 161,  62, 162,  63, 163,  64, 164]),
                     13: np.array([ 65, 165,  66, 166,  67, 167,  68, 168,  69, 169]),
                     14: np.array([ 70, 170,  71, 171,  72, 172,  73, 173,  74, 174]),
                     15: np.array([ 75, 175,  76, 176,  77, 177,  78, 178,  79, 179]),
                     16: np.array([ 80, 180,  81, 181,  82, 182,  83, 183,  84, 184]),
                     17: np.array([ 85, 185,  86, 186,  87, 187,  88, 188,  89, 189]),
                     18: np.array([ 90, 190,  91, 191,  92, 192,  93, 193,  94, 194]),
                     19: np.array([ 95, 195,  96, 196,  97, 197,  98, 198,  99, 199])}

for i in indicesPerBinTest.keys():
    test_eq(indicesPerBin[i], indicesPerBinTest[i])
    
lowerBoundPerBinTest = [np.NINF] + list(np.arange(4.5, 99.5, 5))

test_eq(list(lowerBoundPerBin), lowerBoundPerBinTest)
test_eq(list(lowerBoundPerBin.index), [i for i in range(20)])

#---

# Check if creation of last bin works correctly
yPred = np.append(np.arange(10), np.arange(10))
indicesPerBin, lowerBoundPerBin = generateBins(binSize = 5, yPred = yPred)

test_eq(list(indicesPerBin.keys()), [i for i in range(3)])

indicesPerBinTest = {0: np.array([ 0, 10,  1, 11,  2, 12]),
                     1: np.array([ 3, 13,  4, 14,  5, 15]),
                     2: np.array([ 6, 16,  7, 17,  8, 18,  9, 19])}

for i in indicesPerBinTest.keys():
    test_eq(indicesPerBin[i], indicesPerBinTest[i])
    
lowerBoundPerBinTest = [np.NINF, 2.5, 5.5]

test_eq(list(lowerBoundPerBin), lowerBoundPerBinTest)
test_eq(list(lowerBoundPerBin.index), [i for i in range(3)])

#---

# yPred.unique() == 1
yPred = np.repeat(1, 100)
indicesPerBin, lowerBoundPerBin = generateBins(binSize = 5, yPred = yPred)

test_eq(list(indicesPerBin.keys()), [0])

indicesPerBinTest = {0: np.arange(0, 100, 1)}

for i in indicesPerBinTest.keys():
    test_eq(indicesPerBin[i], indicesPerBinTest[i])
    
lowerBoundPerBinTest = [np.NINF]

test_eq(list(lowerBoundPerBin), lowerBoundPerBinTest)
test_eq(list(lowerBoundPerBin.index), [i for i in range(1)])

#---

# binSize > len(yPred)
yPred = np.arange(10)
indicesPerBin, lowerBoundPerBin = generateBins(binSize = 100, yPred = yPred)

test_eq(list(indicesPerBin.keys()), [0])

indicesPerBinTest = {0: np.arange(0, 10, 1)}

for i in indicesPerBinTest.keys():
    test_eq(indicesPerBin[i], indicesPerBinTest[i])
    
lowerBoundPerBinTest = [np.NINF]

test_eq(list(lowerBoundPerBin), lowerBoundPerBinTest)
test_eq(list(lowerBoundPerBin.index), [i for i in range(1)])

In [None]:
# # LevelSetKDEx.getWeights() and LevelSetKDEx_kNN.getWeights()
# for i in range(len(neighborsList)):
#     if len(neighborsList[i]) < self.binSize:
#         ipdb.set_trace()

In [None]:
# # generateBins
# indices = np.array([])
# for k in range(len(indicesPerBin.keys())):
#     indices = np.append(indices, indicesPerBin[k])

# if len(indices) != len(yPred):
#     ipdb.set_trace()

# predCheck = np.array([pred in binPerPred.keys() for pred in yPred])
# keyCheck = np.array([key in yPred for key in binPerPred.keys()])

# if (all(predCheck) & all(keyCheck)) is False:
#     ipdb.set_trace()

In [None]:
# # LevelSetKDEx.getWeights()
# check = [i for i in range(len(weightsDataList)) if len(weightsDataList[i][1]) > 100]
# check2 = [i for i in range(len(weightsDataList)) if len(weightsDataList[i][1]) > 100 and binPerPred[i] != self.lowerBoundPerBin.index.max()]