In [6]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import collections


In [7]:
# ! pip install knnor==0.0.3

In [8]:
from knnor import data_augment

In [9]:
dataset = load_breast_cancer()

(unique, counts) = np.unique(dataset['target'], return_counts=True)

print('Unique values of the target variable', unique)
print('Counts of the target variable :', counts)

Unique values of the target variable [0 1]
Counts of the target variable : [212 357]


In [10]:
X=dataset["data"]
y=dataset["target"]

print("Original shape=",X.shape,y.shape)
elements_count = collections.Counter(y)
# printing the element and the frequency
print("Original distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")

Original shape= (569, 30) (569,)
Original distribution:
0: 212
1: 357


### Augmentation without any parameters

The algorithm calculates the parameters depending on the data

Final result will give an equal number of minority and majority data points

In [11]:
knnor=data_augment.KNNOR()
X_new,y_new,_,_=knnor.fit_resample(X,y)
print("Shape after augmentation",X_new.shape,y_new.shape)
elements_count = collections.Counter(y_new)
# printing the element and the frequency
print("Final distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")


357 212 (569,)
357 212 (569,)
Shape after augmentation (714, 30) (714,)
Final distribution:
0: 357
1: 357


### Augmentation with user defined parameters


In [12]:
X_new,y_new,_,_=knnor.fit_resample(X,y,
                              num_neighbors=10, # the number of neighbors that will be used for generation of each artificial point
                              max_dist_point=0.01, # the maximum distance at which the new point will be placed
                              proportion_minority=0.3, # proportion of the minority population that will be used to generate the artificial point
                              final_proportion=2 # final number of minority datapoints
                               # example, if num majority =15 and num minority =5, 
#                                putting final_proportion as 1 will add 10 artificial minority points
                              )
print("Shape after augmentation",X_new.shape,y_new.shape)
elements_count = collections.Counter(y_new)
# printing the element and the frequency
print("Final distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")


357 212 (569,)
Shape after augmentation (1071, 30) (1071,)
Final distribution:
0: 714
1: 357
