In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score

# Breast Cancer Dataset

In [2]:
data3 = pd.read_csv('Datasets/bcancer.csv')
data3['diagnosis'] = [x == 'M' for x in data3['diagnosis']]

data3['area_mean143_600'] = data3['area_mean'].apply(lambda x: 1 if 143 <= x <= 600.9 else 0)
data3['area_mean601_1200'] = data3['area_mean'].apply(lambda x: 1 if 601 <= x <= 1200.9 else 0)
data3['area_mean1201_1900'] = data3['area_mean'].apply(lambda x: 1 if 1201 <= x <= 1900.9 else 0)
data3['area_mean1901_2502'] = data3['area_mean'].apply(lambda x: 1 if 1901 <= x <= 2501 else 0)


data3['radius_mean6_14'] = data3['radius_mean'].apply(lambda x: 1 if 6.0 <= x <= 14.9 else 0)
data3['radius_mean15_22'] = data3['radius_mean'].apply(lambda x: 1 if 15.0 <= x <= 22.9 else 0)
data3['radius_mean23_29'] = data3['radius_mean'].apply(lambda x: 1 if 23.0 <= x <= 29.0 else 0)


data3['texture_mean9_19'] = data3['texture_mean'].apply(lambda x: 1 if 9.71 <= x <= 19.9 else 0)
data3['texture_mean20_29'] = data3['texture_mean'].apply(lambda x: 1 if 20.0 <= x <= 29.9 else 0)
data3['texture_mean30_39'] = data3['texture_mean'].apply(lambda x: 1 if 30.0 <= x <= 39.28 else 0)

data3['concavity_mean'] = data3['concavity_mean'].apply(lambda x: 1 if 0.0 <= x <= 0.22 else 0)
data3['concavity_mean'] = data3['concavity_mean'].apply(lambda x: 1 if 0.23 <= x <= 0.42 else 0)

bdata3 = data3.drop(['radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'], axis=1)



In [3]:
x3 = bdata3.drop('diagnosis', axis=1)
y3 = bdata3['diagnosis']

x3_train, x3_test, y3_train, y3_test = train_test_split(x3, y3, test_size=0.3, random_state=42)

In [4]:
# Binarized Binary Classifier

bin_cls = fcalc.classifier.BinarizedBinaryClassifier(x3_train.values, y3_train.to_numpy(), method="standard",  alpha=0)

In [5]:
bin_cls.predict(x3_test.values)
print(bin_cls.predictions)

[0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1.
 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0.
 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 0.
 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0.]


In [6]:
print(accuracy_score(y3_test, bin_cls.predictions))
print(f1_score(y3_test, bin_cls.predictions))

0.9122807017543859
0.8695652173913043


### Pattern Binary Classifier

In [7]:
nbdata3 = pd.read_csv('Datasets/bcancer.csv')
nbdata3['diagnosis'] = [x == 'M' for x in nbdata3['diagnosis']]

x3 = nbdata3.drop('diagnosis', axis=1)
y3 = nbdata3['diagnosis']

x3_train, x3_test, y3_train, y3_test = train_test_split(x3, y3, test_size=0.3, random_state=42)

In [8]:
# Pattern Binary Classifier
pat_cls = fcalc.classifier.PatternBinaryClassifier(x3_train.values, y3_train.to_numpy(), 
                                             categorical=np.arange(x3_train.shape[1]), method="ratio-support",  alpha=1)

In [10]:
pat_cls.predict(x3_test.values)
print(pat_cls.predictions)

[1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1.
 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 0.
 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1.
 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 1.
 0. 0. 0. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0. 0. 1.
 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1.
 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1.
 1. 0. 0.]


In [11]:
print(accuracy_score(y3_test, pat_cls.predictions))
print(f1_score(y3_test, pat_cls.predictions, average = 'macro'))

0.6257309941520468
0.6246913580246914
