In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score

## Diabetes Dataset

In [2]:
# Diabetes Dataset

column_names_d = ['col2', 'col2.1', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8', 'y']

diabetes_dataset = pd.read_csv('Datasets/diabetes.csv', names = column_names_d)

diabetes_dataset['y']= [x == '1' for x in diabetes_dataset['y']]
diabetes_dataset.sample(10)

Unnamed: 0,col2,col2.1,col3,col4,col5,col6,col7,col8,y
530,0,111,65,0,0,24.6,0.66,31,False
372,0,118,64,23,89,0.0,1.731,21,False
734,2,106,56,27,165,29.0,0.426,22,False
133,3,170,64,37,225,34.5,0.356,30,True
341,1,130,70,13,105,25.9,0.472,22,False
648,0,179,50,36,159,37.8,0.455,22,True
211,2,81,60,22,0,27.7,0.29,25,False
69,1,95,66,13,38,19.6,0.334,25,False
274,1,71,78,50,45,33.2,0.422,21,False
143,2,108,52,26,63,32.5,0.318,22,False


### Pattern Binary Classifier

In [3]:
x1 = diabetes_dataset[column_names_d[:-1]]
y1 = diabetes_dataset['y']

x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.3, random_state=42)

In [4]:
# Pattern Binary Classifier

pat_cls = fcalc.classifier.PatternBinaryClassifier(x1_train.values, y1_train.to_numpy(), 
                                             categorical=np.arange(x1_train.shape[1]))   

In [5]:
pat_cls.predict(x1_test.values)
print(pat_cls.predictions)

[0. 0. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1.
 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0.
 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 1. 0.
 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.
 0. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1.
 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1.]


In [6]:
print(accuracy_score(y1_test, pat_cls.predictions))
print(f1_score(y1_test, pat_cls.predictions))

0.6190476190476191
0.45


## Iris Dataset

In [7]:
# Iris Dataset

column_names_i = ['sepal_length','sepal_width','petal_length','petal_width','species']

iris_dataset = pd.read_csv('data_sets/iris.data', names = column_names_i)
iris_dataset['species'] = [x == 'Iris-setosa' for x in iris_dataset['species']]
iris_dataset.sample(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
45,4.8,3.0,1.4,0.3,True
65,6.7,3.1,4.4,1.4,False
40,5.0,3.5,1.3,0.3,True
127,6.1,3.0,4.9,1.8,False
3,4.6,3.1,1.5,0.2,True
76,6.8,2.8,4.8,1.4,False
43,5.0,3.5,1.6,0.6,True
111,6.4,2.7,5.3,1.9,False
86,6.7,3.1,4.7,1.5,False
53,5.5,2.3,4.0,1.3,False


### Pattern Binary Classifier

In [8]:
x2 = iris_dataset.iloc[:,:-1]
y2 = iris_dataset['species']

x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2, test_size=0.3, random_state=42)

In [9]:
# Pattern Binary Classifier

pat_cls = fcalc.classifier.PatternBinaryClassifier(x2_train.values, y2_train.to_numpy())

In [10]:
pat_cls.predict(x2_test.values)
print(pat_cls.predictions)

[0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0.
 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 0. 1. 1.]


In [11]:
print("accuracy:",round(accuracy_score(y2_test, pat_cls.predictions),4))
print("f1 score:",round(f1_score(y2_test, pat_cls.predictions),4))

accuracy: 1.0
f1 score: 1.0


## Breast Cancer Dataset

In [12]:
# Breast Cancer Dataset

column_names_c = ['radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'diagnosis']

cancer_dataset = pd.read_csv('Datasets/bcancer.csv', names = column_names_c)
# cancer_dataset['diagnosis'] = cancer_dataset['diagnosis'].map({'B': 0, 'M': 1})
cancer_dataset['diagnosis']= [x == 'M' for x in cancer_dataset['diagnosis']]
cancer_dataset.sample(10)

Unnamed: 0,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,diagnosis
464,11.6,18.36,73.88,412.7,0.08508,0.05855,0.03367,0.01777,0.1516,0.05859,...,24.02,82.68,495.1,0.1342,0.1808,0.186,0.08288,0.321,0.07863,False
84,19.1,26.29,129.1,1132.0,0.1215,0.1791,0.1937,0.1469,0.1634,0.07224,...,32.72,141.3,1298.0,0.1392,0.2817,0.2432,0.1841,0.2311,0.09203,True
11,16.02,23.24,102.7,797.8,0.08206,0.06669,0.03299,0.03323,0.1528,0.05697,...,33.88,123.8,1150.0,0.1181,0.1551,0.1459,0.09975,0.2948,0.08452,True
406,10.94,18.59,70.39,370.0,0.1004,0.0746,0.04944,0.02932,0.1486,0.06615,...,25.58,82.76,472.4,0.1363,0.1644,0.1412,0.07887,0.2251,0.07732,False
230,12.83,22.33,85.26,503.2,0.1088,0.1799,0.1695,0.06861,0.2123,0.07254,...,30.15,105.3,706.0,0.1777,0.5343,0.6282,0.1977,0.3407,0.1243,True
241,13.64,15.6,87.38,575.3,0.09423,0.0663,0.04705,0.03731,0.1717,0.0566,...,19.05,94.11,683.4,0.1278,0.1291,0.1533,0.09222,0.253,0.0651,False
274,9.742,15.67,61.5,289.9,0.09037,0.04689,0.01103,0.01407,0.2081,0.06312,...,20.88,68.09,355.2,0.1467,0.0937,0.04043,0.05159,0.2841,0.08175,False
70,12.78,16.49,81.37,502.5,0.09831,0.05234,0.03653,0.02864,0.159,0.05653,...,19.76,85.67,554.9,0.1296,0.07061,0.1039,0.05882,0.2383,0.0641,False
472,12.04,28.14,76.85,449.9,0.08752,0.06,0.02367,0.02377,0.1854,0.05698,...,33.33,87.24,567.6,0.1041,0.09726,0.05524,0.05547,0.2404,0.06639,False
435,14.86,16.94,94.89,673.7,0.08924,0.07074,0.03346,0.02877,0.1573,0.05703,...,20.54,102.3,777.5,0.1218,0.155,0.122,0.07971,0.2525,0.06827,False


### Pattern Binary Classifier

In [13]:
# x3 = cancer_dataset.iloc[:,:-1]
x3 = cancer_dataset.drop('diagnosis', axis=1)
y3 = cancer_dataset['diagnosis']

x3_train, x3_test, y3_train, y3_test = train_test_split(x3, y3, test_size=0.3, random_state=42)

In [14]:
# Pattern Binary Classifier

pat_cls = fcalc.classifier.PatternBinaryClassifier(x3_train.values, y3_train.to_numpy(), 
                                             categorical=np.arange(x3_train.shape[1]))

In [15]:
pat_cls.predict(x3_test.values)
print(pat_cls.predictions)

[ 1.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  1.  1.  1.  0.  1.  0.  1.
  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  1.
  1.  0.  0.  1.  1.  1.  1.  0.  1.  0.  1.  0.  1.  0.  0.  0.  1.  1.
  0.  1.  1.  1.  0.  1.  1.  1. -1.  0.  1.  1.  1.  1.  0.  0.  0. -1.
  1.  0.  0.  1.  0.  1.  0.  0.  1.  0.  1.  1.  0.  1.  1.  0.  1.  0.
  1.  0.  1.  1.  0.  0.  0. -1.  0.  1.  0.  0.  1.  0.  0.  1.  1.  0.
  0.  1.  1.  0.  0.  0.  0.  1.  1.  1.  1.  1.  1.  0.  1.  0.  0.  1.
  0.  0.  0.  1. -1.  1.  0.  0.  1.  1.  0.  0.  0.  0.  0.  1. -1.  0.
  0.  0.  1.  1.  1.  0.  0.  1.  0.  1.  0.  1.  1.  1.  0.  0.  0.  1.
  0.  0.  0.  0.  0.  1.  0.  1.  1.]


In [16]:
print(accuracy_score(y3_test, pat_cls.predictions))
print(f1_score(y3_test, pat_cls.predictions, average = 'macro'))

0.6608187134502924
0.4400781589231437
