# Working on Group-CFEs

### Using Datasets from; Retiring Adult: New Datasets for Fair Machine Learning (https://papers.nips.cc/paper/2021/file/32e54441e6382a7fbacbbbaf3c450059-Paper.pdf)


## Data Prep

In [1]:
import numpy as np 
import pandas as pd
import alibi
import random
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn_extra.cluster import KMedoids
from sklearn.neighbors import NearestNeighbors
from sklearn.ensemble import GradientBoostingClassifier
from scipy.spatial import distance
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier, KDTree
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
import dice_ml
from dice_ml.utils import helpers # helper functions

In [3]:
from alibi.datasets import fetch_adult

### Loading the Adult Dataset

In [4]:
adult = fetch_adult()
data = adult.data
target = adult.target
feature_names = adult.feature_names
category_map_tmp = adult.category_map
target_names = adult.target_names

In [5]:
def set_seed(s=0):
    np.random.seed(s)
    tf.random.set_seed(s)

In [6]:
import tensorflow as tf

In [7]:
set_seed()
data_perm = np.random.permutation(np.c_[data, target])
X = data_perm[:,:-1]
y = data_perm[:,-1]

In [8]:
idx = 30000
y_train, y_test = y[:idx], y[idx+1:]

In [9]:
X = np.c_[X[:, 1:8], X[:, 11], X[:, 0], X[:, 8:11]]


In [10]:
feature_names = feature_names[1:8] + feature_names[11:12] + feature_names[0:1] + feature_names[8:11]
print(feature_names)

['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week']


In [11]:
category_map = {}
for i, (_, v) in enumerate(category_map_tmp.items()):
    category_map[i] = v

In [12]:
#ED check ordering of feature values for categorical. For education. 

In [13]:
cat_vars_ord = {}
n_categories = len(list(category_map.keys()))
for i in range(n_categories):
    cat_vars_ord[i] = len(np.unique(X[:, i]))
print(cat_vars_ord)

{0: 9, 1: 7, 2: 4, 3: 9, 4: 6, 5: 5, 6: 2, 7: 11}


In [14]:
X_num = X[:, -4:].astype(np.float32, copy=False)
xmin, xmax = X_num.min(axis=0), X_num.max(axis=0)
rng = (-1., 1.)
X_num_scaled = (X_num - xmin) / (xmax - xmin) * (rng[1] - rng[0]) + rng[0]
#X_num_scaled_train = X_num_scaled[:idx, :]
#X_num_scaled_test = X_num_scaled[idx+1:, :]


In [15]:
#X = np.c_[X[:, :-4], X_num_scaled].astype(np.float32, copy=False)
X_train, X_test = X[:idx, :], X[idx+1:, :]
print(X_train.shape, X_test.shape)


(30000, 12) (2560, 12)


In [16]:
model = make_pipeline(StandardScaler(), GradientBoostingClassifier(random_state=0))
model.fit(X_train, y_train)
yhat = model.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score

In [18]:
accuracy_score(yhat, y_test)

0.87421875

In [19]:
len(np.where(model.predict(X_train) != y_train)[0]) #these are cases that may be troublesome for NNs and NUNs for explanation

3972

## Counterfactuals

### A simple baseline; NUNs - Nearest Unlike Neighbors

In [20]:
pd.DataFrame(X_train, columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])

Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week
0,4,4,2,1,4,4,0,9,27,0,0,44
1,4,1,1,5,1,4,0,9,27,0,0,40
2,4,0,0,6,0,4,1,9,25,0,0,40
3,4,3,0,2,0,0,1,9,46,0,1902,40
4,4,3,2,2,1,4,1,9,45,0,2824,76
...,...,...,...,...,...,...,...,...,...,...,...,...
29995,6,4,2,5,1,4,0,9,41,0,0,15
29996,0,3,3,0,4,2,0,9,53,0,0,30
29997,4,4,1,7,1,4,1,9,20,0,0,40
29998,4,4,0,2,5,2,0,9,32,3908,0,40


#### NUN instances where people make 50k + in the training data

In [21]:
negative_outcome = [X_test[instance] for instance in np.where(yhat == False)][0] # the people in the test set who are predicted to make less than 50k
positive_outcome = [X_test[instance] for instance in np.where(yhat == True)][0] # the people in the test set who are predicted to make more than 50k

positive_train_set = [X_train[instance] for instance in np.where(y_train == True)][0] # the people who make 50k in the train set
negative_train_set = positive = [X_train[instance] for instance in np.where(y_train == False)][0] # the people who dont make 50k in the train set

In [22]:
# regularization deugging only keep cases where model.pred(X_train) ==ytrain

In [23]:
positive_train_indices = sorted(set(np.where(y_train==True)[0]).intersection(set(np.where(model.predict(X_train) == y_train)[0])))
positive_train_set = [X_train[instance] for instance in positive_train_indices]
positive_train_set = np.array(positive_train_set)

In [24]:
negative_train_set_indices = sorted(set(np.where(y_train==False)[0]).intersection(set(np.where(model.predict(X_train) == y_train)[0])))
negative_train_set = [X_train[instance] for instance in negative_train_set_indices]
negative_train_set = np.array(negative_train_set)

In [25]:
#removing duplicates 

In [26]:
positive_train_set = np.array([list(instance) for instance in set(map(tuple, positive_train_set.tolist()))])
negative_train_set = np.array([list(instance) for instance in set(map(tuple, negative_train_set.tolist()))])

In [27]:
np.where(y_train == True)[0].shape, np.where(y_train == False)[0].shape #NB some class imbalance

((7229,), (22771,))

In [28]:
index = 0
neighbors_positive = NearestNeighbors(n_neighbors=30, metric='hamming').fit(positive_train_set) #nb could do with a better distance function
distances, indices = neighbors_positive.kneighbors(X_test[index].reshape(1,-1))

list(X_test[index]), list(positive_train_set[indices[0][0]]) # a NUN

([4, 0, 2, 8, 4, 2, 0, 9, 52, 0, 0, 60],
 [4, 5, 2, 8, 4, 2, 0, 9, 39, 15020, 0, 60])

#### Finding NNs

NB might use a custom distance function

In [29]:
neighbors_negative = NearestNeighbors(n_neighbors=30, metric='hamming').fit(negative_train_set) # other instances that dont get 50k   

In [30]:
def NUN_finder(query, outcome):
    
    if outcome == 'negative':
        #ED rename to neighbors positive
        distances, indices = neighbors_positive.kneighbors(query.reshape(1,-1))
        NUN = positive_train_set[indices[0][0]]
        
    elif outcome == 'positive':
            distances, indices = neighbors_negative.kneighbors(query.reshape(1,-1))
            NUN = negative_train_set[indices[0][0]]
            
    return list(NUN)

In [31]:
def explanation_generator(query, outcome): # a query predicted to be under 50k 
    
    if outcome == 'negative':
        
        query = query
        distances_neg, indices_neg = neighbors_negative.kneighbors(query.reshape(1,-1))
        NNs = (negative_train_set[indices_neg[0][0:5]])

        distances, indices = neighbors_positive.kneighbors(query.reshape(1,-1))
        NUN = positive_train_set[indices[0][0]]

        NUNs = []
        for instance in NNs:
            NUNs.append(NUN_finder(instance, outcome='negative'))

        return query, NUN, NNs, NUNs, indices_neg #return the query, NUN, the NN's in the same class and also the corresponding NUNs
    
    elif outcome == 'positive':
        
        query = query
        distances_pos, indices_pos = neighbors_positive.kneighbors(query.reshape(1,-1))
        NNs = (positive_train_set[indices_pos[0][0:5]])

        distances_neg, indices_neg = neighbors_negative.kneighbors(query.reshape(1,-1))
        NUN = negative_train_set[indices_neg[0][0]]

        NUNs = []
        for instance in NNs:
            NUNs.append(NUN_finder(instance, outcome='positive'))

        return query, NUN, NNs, NUNs, indices_pos #return the query, NUN, the NN's in the same class and also the corresponding NUNs
    


In [32]:
def boarderline_cases(threshold):
    
    max_proba = []
    for instance in range(X_test.shape[0]):
        max_proba.append(model.predict_proba(X_test[instance].reshape(1,-1)).max())
    
    boarderline_cases = np.where(np.array(max_proba) <= threshold)[0]
    
    intersection = list(np.intersect1d(boarderline_cases, np.where(yhat != y_test)[0]))
    #ED FIXED NOW BUT Intersection is the not correctly predictted. Think about this. 
    return np.array(sorted(list(set(boarderline_cases) - set(intersection))))

## DiCE Counterfactuals

In [33]:
# DiCE imports
import dice_ml
from dice_ml.utils import helpers  # helper functions

In [34]:
# Getting dataset ready using pandas

x_train = pd.DataFrame(X_train, columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])
x_train['income'] = y_train

x_test = pd.DataFrame(X_test, columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])
x_test['income'] = y_test
x_test = x_test.drop('income', axis=1)

x_train = x_train.drop('income', axis=1)
#x_test = test_dataset.drop('income', axis=1)

In [35]:
x_train

Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week
0,4,4,2,1,4,4,0,9,27,0,0,44
1,4,1,1,5,1,4,0,9,27,0,0,40
2,4,0,0,6,0,4,1,9,25,0,0,40
3,4,3,0,2,0,0,1,9,46,0,1902,40
4,4,3,2,2,1,4,1,9,45,0,2824,76
...,...,...,...,...,...,...,...,...,...,...,...,...
29995,6,4,2,5,1,4,0,9,41,0,0,15
29996,0,3,3,0,4,2,0,9,53,0,0,30
29997,4,4,1,7,1,4,1,9,20,0,0,40
29998,4,4,0,2,5,2,0,9,32,3908,0,40


Given the train dataset, we construct a data object for DiCE. Since continuous and discrete features have different ways of perturbation, we need to specify the names of the continuous features. DiCE also requires the name of the output variable that the ML model will predict.

In [36]:
type(x_train.iloc[0][10]) 

x_train = x_train.astype({'Capital Loss':'int32'})
x_test = x_test.astype({'Capital Loss':'int32'})
# ED change capital loss to int32 and not a string (for both train set and test set)

In [37]:
# Step 1: dice_ml.Data
# put back in capital loss
d = dice_ml.Data(dataframe=x_train, continuous_features=['Age','Capital Gain', 'Capital Loss', 'Hours per week'], outcome_name='income') #nb capitol loss
m = dice_ml.Model(model=model, backend="sklearn")
exp = dice_ml.Dice(d, m, method='random')

In [38]:
print(np.__version__)

1.19.2


In [39]:
help(dice_ml)

Help on package dice_ml:

NAME
    dice_ml

PACKAGE CONTENTS
    counterfactual_explanations
    data
    data_interfaces (package)
    dice
    diverse_counterfactuals
    explainer_interfaces (package)
    model
    model_interfaces (package)
    utils (package)

FILE
    c:\users\eoind\anaconda3\envs\group_cf\lib\site-packages\dice_ml\__init__.py




### Materials --- Close to Decision Boundary

In [40]:
model.predict(X_test[6].reshape(1,-1))

array([1], dtype=int64)

In [41]:
#defining arguments
#features_to_vary = ['employment type', 'qualification', 'marital status', 'pob',
#                                                   'age', 'weekly hours', 'gender', 'race']

features_to_vary = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week']
random_seed = 0

In [42]:
positive_train_df = pd.DataFrame(np.array(positive_train_set), columns=['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])
negative_train_df = pd.DataFrame(np.array(negative_train_set), columns=['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])

In [43]:
def cfe_generator(instance):
#check if returned indices from exp_generator are full set or subset    
    if model.predict(X_test[instance].reshape(1,-1))[0] == False:

        NNs = explanation_generator((np.array(x_test[instance:instance+1])).reshape(1,-1), outcome = 'negative')[4][0]

        indices_cf_example = [NNs[0], NNs[1], NNs[2], NNs[3], NNs[4]]
                              
        e1 = exp.generate_counterfactuals(x_test[instance:instance+1], total_CFs=1, desired_class="opposite",
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e2 = exp.generate_counterfactuals(negative_train_df[indices_cf_example[0]:indices_cf_example[0]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e3 = exp.generate_counterfactuals(negative_train_df[indices_cf_example[1]:indices_cf_example[1]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e4 = exp.generate_counterfactuals(negative_train_df[indices_cf_example[2]:indices_cf_example[2]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e5 = exp.generate_counterfactuals(negative_train_df[indices_cf_example[3]:indices_cf_example[3]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)
        
    elif model.predict(X_test[instance].reshape(1,-1))[0] == True:
        
        NNs = explanation_generator((np.array(x_test[instance:instance+1])).reshape(1,-1), outcome = 'positive')[4][0]

        indices_cf_example = [NNs[0], NNs[1], NNs[2], NNs[3], NNs[4]]
        
        e1 = exp.generate_counterfactuals(x_test[instance:instance+1], total_CFs=1, desired_class="opposite",
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e2 = exp.generate_counterfactuals(positive_train_df[indices_cf_example[0]:indices_cf_example[0]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e3 = exp.generate_counterfactuals(positive_train_df[indices_cf_example[1]:indices_cf_example[1]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e4 = exp.generate_counterfactuals(positive_train_df[indices_cf_example[2]:indices_cf_example[2]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)

        e5 = exp.generate_counterfactuals(positive_train_df[indices_cf_example[3]:indices_cf_example[3]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)
        
        e6 = exp.generate_counterfactuals(positive_train_df[indices_cf_example[4]:indices_cf_example[4]+1], total_CFs=1, desired_class="opposite", 
                                     features_to_vary=features_to_vary, random_seed=random_seed)
        
        
    return e1,e2,e3,e4,e5
    
    #e1.visualize_as_dataframe(show_only_changes=True)

### Different Models for Robustness

(i) ANN
(ii) Linear - Model or Logistic Regression
(iii) Ensemble Models
(iv)


In [44]:
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [45]:
#mlp_model = make_pipeline(StandardScaler(), MLPClassifier(random_state=0))
#mlp_model.fit(X_train, y_train)
#mlp_yhat = mlp_model.predict(X_test)

In [46]:
#logistic_model = make_pipeline(StandardScaler(), LogisticRegression(random_state=0))
#logistic_model.fit(X_train, y_train)
#logistic_yhat = logistic_model.predict(X_test)

In [47]:
from sklearn.metrics import accuracy_score

In [48]:
accuracy_score(y_test, yhat)

0.87421875

In [49]:
#accuracy_score(y_test, mlp_yhat)

In [50]:
#accuracy_score(y_test, logistic_yhat)

### Coverage Check

In [51]:
from scipy import stats

In [52]:
def group_cfe_generator(test_index):
    
    example = cfe_generator(test_index)
    
    cfes = []
    
    group_cfe = []
    
    for i in range(5):
        
        cfes.append(np.array(example[i].cf_examples_list[0].final_cfs_df.values[0]))
        
        
    for feature in range(12):
        
        feature_values = (np.array([cfes[i][feature] for i in range(5)]))
        
        if [stats.mode(feature_values)[1] >=3][0][0] == True:
            
            group_cfe.append(stats.mode(feature_values)[0][0])
            
        else:
            group_cfe.append(round(np.mean(feature_values.astype(np.float)) ,1))
                
    return np.array(group_cfe).astype(np.float)


In [53]:
boarderline_cases_ = boarderline_cases(threshold=0.7)

In [54]:
group_cfe_generator(4)

array([4.0000e+00, 4.0000e+00, 1.0000e+00, 6.0000e+00, 1.0000e+00,
       4.0000e+00, 0.0000e+00, 9.0000e+00, 2.0000e+01, 0.0000e+00,
       2.4114e+03, 2.5000e+01])

In [55]:
group_cfe_instances = []

for case in boarderline_cases_[0:9]: #ed edit full
    
    group_cfe_instances.append(group_cfe_generator(case))

In [56]:
np.array(group_cfe_instances).shape

(9, 12)

In [57]:
#gcfe_df = pd.DataFrame(group_cfe_instances, columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])

In [58]:
#test_df = pd.DataFrame(np.array([X_test[instance] for instance in boarderline_cases_]), columns = ['Workclass', 'Education', 'Marital Status', 'Occupation', 'Relationship', 'Race', 'Sex', 'Country', 'Age', 'Capital Gain', 'Capital Loss', 'Hours per week'])

In [59]:
#feature_changes = np.array([len(np.where(np.array(gcfe_df - test_df)[index] != 0)[0]) for index in range(len(boarderline_cases_))])

In [60]:
cat = 'Country'
idx = feature_names.index(cat)
np.array(category_map[idx])[:]

array(['?', 'British-Commonwealth', 'China', 'Euro_1', 'Euro_2',
       'Latin-America', 'Other', 'SE-Asia', 'South-America',
       'United-States', 'Yugoslavia'], dtype='<U20')

In [61]:
#boarderline_cases_[80]

In [62]:
#gcfe_df.head(134)

In [63]:
#cfe_generator(boarderline_cases_[40])[4].visualize_as_dataframe(show_only_changes=True)

In [64]:
#np.round(gcfe_df - test_df, 0).iloc[80:130]

### Debugging training instance class

In [65]:
([cfe_generator(boarderline_cases_[9])[instance].visualize_as_dataframe(show_only_changes=True) for instance in range(5)])

Query instance (original outcome : 1)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,8,0,4,1,9,35,0,0,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,-,-,-,1,-,-,-,-,66,-,-,-,0


Query instance (original outcome : 1)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,8,0,4,1,9,35,0,0,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,-,-,-,1,-,-,-,-,66,-,-,-,0


Query instance (original outcome : 1)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,8,0,4,1,9,57,0,0,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,-,3,-,-,-,-,-,8,-,-,-,-,0


Query instance (original outcome : 1)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,8,0,4,1,9,47,0,0,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,-,-,-,-,-,-,-,-,26,-,-,10,0


Query instance (original outcome : 1)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,8,0,4,1,9,46,0,0,40,1



Diverse Counterfactual set (new outcome: 0.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,-,-,-,-,-,-,-,-,26,-,-,10,0


[None, None, None, None, None]

In [66]:
cfe_generator(boarderline_cases_[74])[0].visualize_as_dataframe()

Query instance (original outcome : 0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,7,0,1,1,4,58,0,0,40,0



Diverse Counterfactual set (new outcome: 1.0)


Unnamed: 0,Workclass,Education,Marital Status,Occupation,Relationship,Race,Sex,Country,Age,Capital Gain,Capital Loss,Hours per week,income
0,4,1,0,7,0,1,1,4,58,96766,0,24,1


In [None]:
number_of_changes = []

for case in range(len(boarderline_cases_)):

  
    for instance in range(5):

            q_cf_pair = cfe_generator(boarderline_cases_[case])[instance]

            q = q_cf_pair.cf_examples_list[0].test_instance_df.values[0]
            cf = q_cf_pair.cf_examples_list[0].final_cfs_df.values[0]

            number_of_changes.append(len(list(np.where(q-cf !=0)[0])) -1) #subtract one for the income feature 

In [None]:
np.where([((np.array_split(number_of_changes, len(boarderline_cases_))[instance])).sum() == 10 for instance in range(len(boarderline_cases_))])[0]

In [None]:
len(np.where([((np.array_split(number_of_changes, len(boarderline_cases_))[instance])).sum() == 10 for instance in range(len(boarderline_cases_))])[0])