# Computation Notebook 

This notebook is meant to show and compare the computational costs with the grid-based approach (searching all possible gridpoints) versus the gridless approach (binary search) to find decision boundary points. 

We show the times that are associated with each of these.

In [1]:
%load_ext cuml.accel
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
#import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
import random
from sklearn.utils import resample

random.seed(0)
warnings.filterwarnings('ignore', category=UserWarning)

import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

# Datasets with a varying number of features ($2$, $10$, $50$) and a model (Logistic Regression) 

We construct a set of pandas dataframes (df1, df2, df3) and a model logistic regression alongside the dataset.

In [2]:
X, y = make_classification(n_samples=2000, n_features=2, n_informative=2, n_redundant=0, random_state=42, n_classes=2)
model = LogisticRegression()
y = y.reshape(-1,1)
df1 = pd.DataFrame(data=np.hstack((X,y)))

In [3]:
X, y = make_classification(n_samples=2000, n_features=10, n_informative=10, n_redundant=0, random_state=42, n_classes=2)
y = y.reshape(-1,1)
df2 = pd.DataFrame(data=np.hstack((X,y)))

In [4]:
X, y = make_classification(n_samples=2000, n_features=50, n_informative=50, n_redundant=0, random_state=42, n_classes=2)
y = y.reshape(-1,1)
df3 = pd.DataFrame(data=np.hstack((X,y)))

# Experiments of the Computation Costs for Grid-based Approach (Logistic Regression with Numba)

$\textbf{ Logistic Regression with Grid-based Method }$ -- 50 features

Resolution: $R = 15$ ($15^{50}$ points), Memory Error (Maximum allowed dimension exceeded), 0 boundary points found

Resolution: $R = 10$ ($10^{50}$ points), Memory Error (Maximum allowed dimension exceeded), 0 boundary points found

$\textbf{ Logistic Regression with Grid-based Method }$ -- 10 features

Resolution: $R = 15$ ($15^{15}$ points), 42 Terabyte Memory Error, 0 boundary points found

Resolution: $R = 10$ ($10^{10}$ points), 745 Gigabytes Memory Error, 0 boundary points found


$\textbf{ Logistic Regression with Grid-based Method }$ -- 2 features

Resolution: $R = 150$ ($150^2 = 22,500$ points searched), 28.1 seconds runtime, 454 boundary points found

Resolution: $R = 100$ ($100^2 = 10,000$ points searched) 5.5 seconds runtime, 104 boundary points found

In [6]:
from files.grid_optimal_point import optimal_point

In [12]:
X, y = make_classification(n_samples=2000, n_features=2, n_informative=2, n_redundant=0, random_state=42, n_classes=2)
model = LogisticRegression()
y = y.reshape(-1,1)
df1 = pd.DataFrame(data=np.hstack((X,y)))

In [13]:
df1.head(n=10)

Unnamed: 0,0,1,2
0,0.800062,-0.957489,1.0
1,1.187099,1.159787,1.0
2,0.154512,1.21752,0.0
3,0.179014,-0.852832,1.0
4,-0.735827,-0.245366,0.0
5,0.039487,1.320957,1.0
6,-1.482199,0.419738,0.0
7,-0.622829,-0.803223,0.0
8,0.965721,-1.068587,1.0
9,0.798459,-1.022348,1.0


In [9]:
optimal_point(df1, model=model, desired_class=0, original_class=1, resolution=150, chosen_row=1, point_epsilon=0.1, epsilon=0.07, plot=False)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
Number of boundary points
(311, 2)
Finding the closest point from the contour line to the point...
Finding the closest point from the contour line to the point.
[[1.07092306 0.87624128]]
[[-1.16018181  0.00452233]]


[np.float64(-0.05536678064834133), np.float64(1.163749815503608)]

In [10]:
optimal_point(df1, model=model, desired_class=0, original_class=1, resolution=100, chosen_row=1, point_epsilon=0.1, epsilon=0.07)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
Number of boundary points
(104, 2)
Finding the closest point from the contour line to the point...
Finding the closest point from the contour line to the point.
[[1.07092411 1.09801295]]
[[-1.15887253 -0.03919662]]


[np.float64(-0.05396585561956102), np.float64(1.116748769377007)]

In [11]:
df2.head(n=10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1.99062,-2.921959,-5.689956,3.173745,-4.393239,0.538239,-0.96265,-3.024594,-0.837913,-1.944185,1.0
1,1.342356,0.942957,3.057782,0.059667,-0.611317,-0.985521,0.323851,0.853719,1.741036,2.069323,1.0
2,-1.063987,2.185873,-2.230002,1.095676,-0.240747,-1.776678,-4.16509,-0.122159,0.688403,1.401783,1.0
3,-4.852159,1.29738,3.466596,4.114327,-4.004815,-0.591707,2.035825,3.118117,-0.996438,1.46557,1.0
4,0.033975,-0.181,0.438378,-1.50681,-0.503984,2.442108,-0.40801,1.722336,2.563095,-3.228898,1.0
5,2.77068,-0.667671,-2.726522,1.778277,3.03332,1.052492,1.810689,-3.73043,-2.032935,2.37108,0.0
6,-1.375939,0.889744,-0.487178,0.394568,-1.553368,2.302102,-0.134183,1.3401,0.741324,0.554025,1.0
7,0.060123,-2.850018,-1.808885,1.64985,0.827631,-0.681723,3.049334,-1.560887,-0.029848,0.037102,0.0
8,1.735121,4.266722,0.767749,-2.012636,2.538945,-4.960979,2.908603,-6.451668,2.723697,0.785976,0.0
9,0.436313,1.826231,-0.232626,-0.57004,1.101895,0.658344,-0.079847,-2.15896,0.985347,0.426746,0.0


In [12]:
optimal_point(df2, model=model, desired_class=0, original_class=1, resolution=15, chosen_row=1, point_epsilon=0.1, epsilon=0.07)

Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
Not enough memory to generate a grid with 15^10 number of points


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


In [13]:
optimal_point(df2, model=model, desired_class=0, original_class=1, resolution=10, chosen_row=1, point_epsilon=0.1, epsilon=0.07)

Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
Not enough memory to generate a grid with 10^10 number of points


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


In [14]:
optimal_point(df3, model=model, desired_class=0, original_class=1, resolution=15, chosen_row=1, point_epsilon=0.1, epsilon=0.07)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...


ValueError: Maximum allowed dimension exceeded

In [15]:
optimal_point(df3, model=model, desired_class=0, original_class=1, resolution=10, chosen_row=1, point_epsilon=0.1, epsilon=0.07)

Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


ValueError: Maximum allowed dimension exceeded

# Experiments of the Computation Costs for SSBA method Using GPU (Logistic Regression)


$\textbf{ Logistic Regression with SSBA Method }$ -- 50 features

$T = 1,000,000$, 6.2 seconds runtime, $689,574$ boundary points found

$T = 250,000$ $, 2.2 seconds runtime, $500,000$ boundary points found

$T = 100,000$, 0.9 seconds runtime, $100,000$ boundary points found

$T = 10,000$, 0.1 seconds runtime, $10,000$ boundary points found

$\textbf{ Logistic Regression with SSBA Method }$ -- 10 features

$T = 1,000,000$, ~2.2 seconds runtime, $711,436$ boundary points found

$T = 250,000$, ~0.9 seconds runtime, $250,000$ boundary points found

$T = 100,000$, ~0.4 seconds runtime, $100,000$ boundary points found

$T = 10,000$, ~0.1 seconds runtime, $10,000$ boundary points found


$\textbf{ Logistic Regression with SSBA Method }$ -- 2 features

$T = 1,000,000$, ~2.4 seconds runtime, $793,845$ boundary points found

$T = 250,000$, ~0.7 seconds runtime, $250,000$ boundary points found

$T = 100,000$, ~0.3 seconds runtime, $100,000$ boundary points found

$T = 10,000$, ~0.0 seconds runtime, $10,000$ boundary points found

$T = 1,500$, ~0.0 seconds runtime, $1,500$ boundary points found

$T = 500$, ~0.0 seconds runtime, $500$ boundary points found

In [5]:
from files.binary_search_optimal_point import optimal_point

In [6]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=500, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(500, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01086175 1.29040267]]
[[-1.17303308 -0.00460196]]


[np.float64(0.0013244129875529254), np.float64(1.1538487763889977)]

In [7]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=1500, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(1500, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01086306 1.05139047]]
[[-1.17125033 -0.02540175]]


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


[np.float64(0.0031249892087643616), np.float64(1.133079999182291)]

In [8]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=10000,  chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(10000, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01086375 1.03047306]]
[[-1.17031399 -0.05033313]]


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


[np.float64(0.004070682320600039), np.float64(1.107920231997797)]

In [9]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=100000,  chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(100000, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01086502 1.02418796]]
[[-1.16860428 -0.0721871 ]]


[np.float64(0.0057974944660421635), np.float64(1.0858539989768687)]

In [10]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=250000,  chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(250000, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01086532 1.02348078]]
[[-1.16819494 -0.07592147]]


[np.float64(0.006210923993172024), np.float64(1.0820830003103634)]

In [11]:
optimal_point(df1, model=model, desired_class=0, original_class=1, threshold=1000000,  chosen_row=1, point_epsilon=1e-4, epsilon=1e-3)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 2
1    1000
0    1000
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(793845, 2)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.00185492 1.02077407]]
[[-1.17187423 -0.05162185]]


[np.float64(0.013050724564066973), np.float64(1.1070929121254551)]

In [12]:
optimal_point(df2, model=model, desired_class=0, original_class=1, threshold=10000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(10000, 10)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[0.99470607 1.033535   1.10989421 1.0201636  1.00780469 1.03375915
  0.99961999 1.01589289 1.02518904 1.01279034]]
[[ 0.06503928 -0.04391481 -0.0111107  -0.10037421  0.45907168 -0.04350994
   0.09630241 -0.17239293 -0.06749531 -0.36296287]]


  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


[np.float64(1.4070513982093271),
 np.float64(0.8975690188872971),
 np.float64(3.0454506275665807),
 np.float64(-0.04273139885169344),
 np.float64(-0.1486622212380574),
 np.float64(-1.030499538511193),
 np.float64(0.42011650458039723),
 np.float64(0.6785865048013077),
 np.float64(1.6718403815887657),
 np.float64(1.7017173802025423)]

In [13]:
optimal_point(df2, model=model, desired_class=0, original_class=1, threshold=100000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(100000, 10)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.02519385 1.01673163 1.01586783 1.01735927 1.00743408 1.01568235
  0.99916253 1.02841503 1.01996066 1.01451506]]
[[-0.06747428 -0.15103802 -0.17312499 -0.13824186  0.39262118 -0.17874338
   0.09219515 -0.0558465  -0.1023989  -0.22469561]]


[np.float64(1.2731822227734253),
 np.float64(0.7893913749124312),
 np.float64(2.8819102135422106),
 np.float64(-0.08097492046032218),
 np.float64(-0.21577685078871262),
 np.float64(-1.1670672434352611),
 np.float64(0.41596863326078215),
 np.float64(0.7962858755078444),
 np.float64(1.6365929921777833),
 np.float64(1.8413655893612024)]

In [14]:
optimal_point(df2, model=model, desired_class=0, original_class=1, threshold=250000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(250000, 10)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.07355226 1.01650651 1.02523848 1.01436357 1.00647331 1.01670969
  1.00077519 1.01357508 1.00104457 1.01736361]]
[[-0.01689243 -0.15622906 -0.06727959 -0.2324618   0.28538742 -0.15152854
   0.1084874  -0.28351106  0.11178079 -0.13816092]]


[np.float64(1.324221527350697),
 np.float64(0.78414865084697),
 np.float64(2.9888047006260083),
 np.float64(-0.176134061267755),
 np.float64(-0.3240819876296826),
 np.float64(-1.1395812828187764),
 np.float64(0.4324221925646943),
 np.float64(0.5663595093001842),
 np.float64(1.852933387365994),
 np.float64(1.928762777949529)]

In [15]:
optimal_point(df2, model=model, desired_class=0, original_class=1, threshold=1000000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 10
1    1005
0    1005
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(711436, 10)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.07355226 1.01650651 1.02523848 1.01436357 1.00647331 1.01670969
  1.00077519 1.01357508 1.00104457 1.01736361]]
[[-0.01689243 -0.15622906 -0.06727959 -0.2324618   0.28538742 -0.15152854
   0.1084874  -0.28351106  0.11178079 -0.13816092]]


[np.float64(1.324221527350697),
 np.float64(0.78414865084697),
 np.float64(2.9888047006260083),
 np.float64(-0.176134061267755),
 np.float64(-0.3240819876296826),
 np.float64(-1.1395812828187764),
 np.float64(0.4324221925646943),
 np.float64(0.5663595093001842),
 np.float64(1.852933387365994),
 np.float64(1.928762777949529)]

In [16]:
optimal_point(df3, model=model, desired_class=0, original_class=1, threshold=10000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(10000, 50)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.0101367  1.00775898 1.00978458 1.0101575  1.01021673 1.00967862
  1.0167958  1.01071452 1.00990974 1.01057373 1.01275108 1.00804708
  1.00950289 1.01056287 1.01032898 1.00939993 1.0109198  1.00786105
  1.00946217 1.00954339 1.00969167 1.01106528 1.00937778 1.0094003
  1.01034962 1.0105574  1.01044271 1.00927561 1.00905248 1.00747706
  1.01028832 1.01030606 1.0086744  1.00923112 1.00915979 1.01113488
  1.01594436 1.00930712 1.01041383 1.00687538 1.0102529  1.01080211
  1.00949876 1.00818014 1.00967135 1.01054451 1.00940115 1.00979428
  1.00984521 1.00977534]]
[[-7.38954958  0.44968685  4.687599   -6.41357931 -4.66116211  3.14170746
  -0.14962124 -1.41454567 11.18927904 -1.76140616 -

[np.float64(3.5794564082697153),
 np.float64(1.7588731222704843),
 np.float64(0.5713983486276648),
 np.float64(0.5834262941503638),
 np.float64(5.479397152866933),
 np.float64(-2.04484849680525),
 np.float64(0.71770974949442),
 np.float64(4.965578810839221),
 np.float64(-3.1988934040984365),
 np.float64(-4.406296531460539),
 np.float64(1.012107388342394),
 np.float64(2.33765036522611),
 np.float64(-0.42733912374515404),
 np.float64(-2.5138636894310435),
 np.float64(-2.607314421826483),
 np.float64(1.8005126918045185),
 np.float64(0.07193650699249132),
 np.float64(2.8913113698386503),
 np.float64(-0.5907576552113132),
 np.float64(0.8742505760167751),
 np.float64(-0.853296432849215),
 np.float64(8.07237812237249),
 np.float64(-0.22456527324489262),
 np.float64(2.3425316153710245),
 np.float64(-4.768350012746907),
 np.float64(-0.1080486542770347),
 np.float64(1.1257998698722327),
 np.float64(3.1874577279091056),
 np.float64(8.488932487126348),
 np.float64(-6.1253889572868445),
 np.float64

In [17]:
optimal_point(df3, model=model, desired_class=0, original_class=1, threshold=100000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(100000, 50)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01035315 1.00948861 1.00208776 1.01136132 1.01089844 1.0095726
  1.01026601 1.01021662 1.0098793  1.01042601 1.01053017 1.01017598
  1.00953023 1.01024283 1.00900338 1.00960077 1.00944895 1.01140166
  1.00975067 1.01071486 1.0092634  1.01031133 1.01177203 1.00249625
  1.00963151 0.99793986 1.01037688 1.00865913 1.01023376 1.00985302
  1.01090308 1.01044658 1.00950653 1.01094236 1.00731693 1.01020031
  1.01033588 1.00968806 1.01591153 1.00877449 1.01024561 1.01067439
  1.00966679 1.00506393 1.00965636 1.01393617 1.00974945 1.00966515
  1.00973248 1.01322373]]
[[-2.86094596  1.97400263  0.12665029 -0.74292869 -1.12516647  2.36212321
  -3.79781785 -4.66348392  8.36684976 -2.37181731 

[np.float64(8.15334584982074),
 np.float64(3.2984303267098576),
 np.float64(-4.0351521493402815),
 np.float64(6.31078221418076),
 np.float64(9.050752071888581),
 np.float64(-2.832228477757305),
 np.float64(-2.966962299179409),
 np.float64(1.6841516765591384),
 np.float64(-6.049546951108416),
 np.float64(-5.022811650336448),
 np.float64(-0.5411772223589679),
 np.float64(-3.9814504123506875),
 np.float64(-0.30790756915761186),
 np.float64(-4.902493956494906),
 np.float64(1.5170715213694024),
 np.float64(2.655693016713875),
 np.float64(3.0321802494011596),
 np.float64(1.686612102778613),
 np.float64(1.6039598572429266),
 np.float64(-2.786794731598521),
 np.float64(-2.7768561184472835),
 np.float64(5.753386340543472),
 np.float64(-2.4396986402663523),
 np.float64(0.7774659415016133),
 np.float64(0.9177288139441941),
 np.float64(1.8066563807028817),
 np.float64(0.7232948059470377),
 np.float64(2.540023365888375),
 np.float64(3.0485222252595827),
 np.float64(0.4107970791391642),
 np.float64(

In [27]:
optimal_point(df3, model=model, desired_class=0, original_class=1, threshold=250000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(250000, 50)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01016285 1.00978172 1.00959691 1.01018086 1.01023564 1.00970167
  1.00906782 1.01079172 1.0098747  1.00959531 1.01026077 1.01026997
  1.00938092 1.00640707 1.00970065 1.00934682 1.00924954 1.01022833
  1.00923392 1.00870558 1.00926939 1.01027767 1.15156955 1.01034185
  1.00956214 1.00951857 1.0105032  1.00839421 1.00109924 1.00927276
  1.01043169 1.02007724 1.01443231 1.00955004 1.01370924 1.0106066
  1.01030338 1.00960733 1.01055797 1.01057371 1.01030576 1.00951209
  1.00939173 1.01136188 1.00956361 1.00975567 1.00894374 1.00980913
  1.00971697 1.01461338]]
[[-6.20292604  4.62606574  2.50464325 -5.58541919 -4.28726181  3.38449357
   1.08248051 -1.27669899  8.05934393  2.49472903 

[np.float64(4.777946158422404),
 np.float64(5.977013774993333),
 np.float64(-1.633386771326594),
 np.float64(1.4198679926680526),
 np.float64(5.857036441354809),
 np.float64(-1.799634539635242),
 np.float64(1.9621402508788526),
 np.float64(5.104803874827192),
 np.float64(-6.3601278325124575),
 np.float64(-0.10759901397252092),
 np.float64(-2.529005757912265),
 np.float64(-1.9632770808852495),
 np.float64(-0.8316267908049884),
 np.float64(-0.417627743908126),
 np.float64(3.901221032034706),
 np.float64(1.6622996740134295),
 np.float64(2.540280549692748),
 np.float64(-2.053345972880043),
 np.float64(-1.1558581530754275),
 np.float64(-0.5717267472380506),
 np.float64(-2.765493192349362),
 np.float64(5.3561665033104955),
 np.float64(-1.871378076128801),
 np.float64(-2.3425798262012845),
 np.float64(0.4791411442680009),
 np.float64(3.840950295447394),
 np.float64(1.4028051117464444),
 np.float64(2.4145134094968776),
 np.float64(7.526944396048299),
 np.float64(-5.127021997464764),
 np.float6

In [18]:
optimal_point(df3, model=model, desired_class=0, original_class=1, threshold=1000000, chosen_row=1, point_epsilon=1e-3, epsilon=0.01)

  balanced_dataset = pd.concat([balanced_dataset, upsampled_class], ignore_index=True)


Class counts:
 50
1    1001
0    1001
Name: count, dtype: int64
Fitting model...
Model training complete.
boundary points started generation...
boundary points finished.
(689574, 50)
Finding the closest point from the contour line to the point...
Found the closest point from the contour line to the point.
[[1.01028144 1.0093257  1.00898715 1.01049448 1.01030976 1.00975198
  1.00953466 1.01023855 1.00988581 1.010713   1.0105713  1.01039549
  1.00847385 1.01064309 1.00947162 1.012031   1.01026245 1.01042152
  1.01615662 1.01080378 1.00968541 1.01019558 1.01194125 1.01056519
  1.00914584 1.00945694 1.01062109 1.00942733 1.01028276 1.00973846
  1.01049841 1.01049315 1.00900146 1.01050555 1.00975036 1.0091618
  1.01082661 1.00866522 1.00862605 1.00936586 1.01027968 1.009335
  1.01066487 1.00980168 1.00927085 1.01501322 1.01126741 1.00975182
  1.0095769  1.0082031 ]]
[[-3.5897048   1.49685165  0.99619028 -2.04353689 -3.2615739   4.07127148
   2.169471   -4.23488437  8.84388891 -1.4175559  -1

[np.float64(7.417299486882551),
 np.float64(2.816508002888985),
 np.float64(-3.1569236570316432),
 np.float64(4.99716880573836),
 np.float64(6.892981152568453),
 np.float64(-1.1059889100135818),
 np.float64(3.0600001700919863),
 np.float64(2.117037191843826),
 np.float64(-5.567737409802822),
 np.float64(-4.059007911928413),
 np.float64(-0.4026725307685768),
 np.float64(-0.7640593736475993),
 np.float64(-1.810982169563153),
 np.float64(-2.287798232907855),
 np.float64(2.4241158864001062),
 np.float64(-0.4017118413774091),
 np.float64(-2.7058285658347376),
 np.float64(-0.005666718631546441),
 np.float64(-2.65313458959901),
 np.float64(-2.628942111898315),
 np.float64(-0.9190809758835359),
 np.float64(3.814197741986005),
 np.float64(-2.3895181021104617),
 np.float64(-1.1633516702011095),
 np.float64(-0.6563389821927528),
 np.float64(3.600485716572961),
 np.float64(1.7875913089733722),
 np.float64(3.56055177408217),
 np.float64(3.8046594427466522),
 np.float64(-2.629357903351338),
 np.floa