# Imports

In [2]:
from nrn.sklogic.classifiers import NRNClassifier

In [3]:
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, roc_auc_score

import warnings
warnings.filterwarnings("ignore", message="Choices for a categorical distribution should be a tuple")
warnings.filterwarnings("ignore", message="To copy construct from a tensor, it is recommended")
warnings.filterwarnings("ignore", message="IProgress not found")
warnings.filterwarnings("ignore", message="A value is trying to be set on a copy of a DataFram")

# Load Data

In [4]:
data = load_breast_cancer()
data.target_names

array(['malignant', 'benign'], dtype='<U9')

# Preprocess Data

In [6]:
mms = MinMaxScaler()
X = mms.fit_transform(data.data)

In [7]:
y = data.target.reshape(-1, 1)

In [8]:
X_train_array, X_test_array, y_train_array, y_test_array = train_test_split(X, y, test_size=0.1, random_state=42)
X_train_array, X_val_array, y_train_array, y_val_array = train_test_split(X_train_array, y_train_array, test_size=0.22, random_state=42)

In [9]:
X_train = pd.DataFrame(X_train_array, columns=data.feature_names)
X_train.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,0.264991,0.293879,0.24905,0.146554,0.282567,0.069873,0.004358,0.014533,0.321717,0.180918,...,0.19815,0.294776,0.175059,0.093123,0.215479,0.037789,0.004456,0.030144,0.185295,0.060803
1,0.287236,0.139669,0.268952,0.164199,0.278866,0.055119,0.010682,0.043882,0.198485,0.109941,...,0.241907,0.14339,0.216893,0.11974,0.200951,0.030251,0.014569,0.121375,0.10684,0.070576
2,0.221449,0.248901,0.206689,0.117709,0.207457,0.051899,0.019461,0.057753,0.308586,0.154802,...,0.190324,0.205757,0.165347,0.087815,0.187611,0.036354,0.024329,0.14646,0.229844,0.086646
3,0.197785,0.395671,0.187686,0.100445,0.443893,0.123919,0.020982,0.053479,0.280303,0.241786,...,0.157595,0.450426,0.143682,0.067047,0.37661,0.081022,0.02861,0.147973,0.278139,0.106192
4,0.279663,0.11532,0.263285,0.157964,0.152749,0.054935,0.052952,0.058201,0.139899,0.12321,...,0.202419,0.093817,0.184222,0.096294,0.165027,0.084806,0.094728,0.184399,0.146659,0.092549


In [11]:
y_train = pd.DataFrame(y_train_array, columns=[data.target_names[1]])
y_train.head()

Unnamed: 0,benign
0,1
1,1
2,1
3,1
4,1


In [13]:
X_val = pd.DataFrame(X_val_array, columns=data.feature_names)
X_test = pd.DataFrame(X_test_array, columns=data.feature_names)
X_val.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,0.344503,0.336152,0.373437,0.20632,0.232373,0.625483,0.703608,0.387575,0.325253,0.584035,...,0.262184,0.3121,0.281837,0.128736,0.12098,0.38033,0.541773,0.517182,0.164203,0.348682
1,0.601022,0.404802,0.595052,0.445599,0.409317,0.351267,0.436504,0.548211,0.516162,0.151432,...,0.527215,0.603412,0.533841,0.332973,0.334346,0.30902,0.378115,0.774914,0.488863,0.158337
2,0.302854,0.710517,0.294036,0.175483,0.359484,0.168333,0.06605,0.162227,0.159091,0.214827,...,0.253291,0.788646,0.23049,0.1279,0.303969,0.111389,0.055935,0.26677,0.124384,0.142398
3,0.15519,0.23233,0.152443,0.075207,0.326262,0.187964,0.102109,0.121173,0.307576,0.361837,...,0.103166,0.267058,0.102943,0.042322,0.494816,0.191431,0.142412,0.286357,0.221959,0.260724
4,0.246533,0.121069,0.243867,0.132471,0.487226,0.232225,0.066893,0.143241,0.414141,0.310657,...,0.192458,0.154318,0.204044,0.088478,0.472363,0.224321,0.079193,0.28134,0.37532,0.245113


In [14]:
y_val = pd.DataFrame(y_val_array, columns=[data.target_names[1]])
y_test = pd.DataFrame(y_test_array, columns=[data.target_names[1]])
y_val.head()

Unnamed: 0,benign
0,1
1,0
2,1
3,1
4,1


# Model training

In [17]:
clf = NRNClassifier(
    target_names=[data.target_names[1]],
    binarization=True)

In [19]:
clf.fit(X_train, y_train)

# Prediction

In [20]:
y_val_pred = clf.predict(X_val, decision_boundary=0.5)

In [21]:
y_val_pred

Unnamed: 0,benign
0,1
1,0
2,1
3,1
4,1
...,...
108,0
109,0
110,0
111,1


# Evaluation

In [24]:
roc_auc_score(y_val, y_val_pred)

0.9275747508305648

# Explanations

In [25]:
print(clf.explain_sample(X_val, sample_index=0, quantile=1., decision_boundary=0.5))

The prediction is benign because: 


AND 
	The area error was less than or equal to 0.123 (12th percentile)
	The mean area was less than or equal to 0.234 (22th percentile)
	The mean perimeter was less than or equal to 0.48 (46th percentile)
	The mean texture was between 0.15 and 0.449 (18th and 55th percentiles)
	The perimeter error was less than or equal to 0.167 (16th percentile)
	The worst area was less than or equal to 0.14 (13th percentile)
	The worst smoothness was less than or equal to 0.189 (18th percentile)
