In [1]:
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
import random

In [50]:
class Oversampling(BaseEstimator, TransformerMixin):
    
    def __init__(self, ratio = 1, sensitive_class=1, random_state = 1, shuffle = False):
        self.ratio = ratio
        self.sensitive_class = sensitive_class
        self.random_state = random_state
        self.shuffle = shuffle
        random.seed(self.random_state)
    
    def fit_transform(self, x, y):
        while (self._count_samples(y)/(len(y) - self._count_samples(y))) < self.ratio:
            x, y = self._add_sample(x, y)
            
        if self.shuffle == True:
            x, y = self._shuffle(x, y)
        
        return x, y
    
    def _count_samples(self, y):
        if self.sensitive_class == 0:
            return len(y) - np.count_nonzero(y)
        else:
            return np.count_nonzero(y)
        
    def _add_sample(self, x, y):
        pos = random.randint(0, self._count_samples(y) - 1)
        x = np.hstack((x, x[np.nonzero(y == self.sensitive_class)[0][pos]]))
        y = np.hstack((y, [self.sensitive_class]))

        return x, y
    
    def _shuffle(self, x, y):
        assert len(x) == len(y)
        p = np.random.permutation(len(x))
        return x[p], y[p]
    

In [51]:
x = np.array([1,1,1,1,1,1,1,0,0,0,0,0])
y = np.array([1,1,1,1,1,1,1,0,0,0,0,0])
ov = Oversampling(5, 1, 5, True)
x, y = ov.fit_transform(x,y)
print(x,y)

[1 1 1 0 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1] [1 1 1 0 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1]
