## Load data

In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder

# Load data
data = pd.read_csv('data-A4/iris.csv').values

# Split data into X and y
X = data[:,0:-1].astype(float)
y_raw = data[:,-1]

# Converts label strings to integers
encoder = LabelEncoder()
encoder.fit(y_raw)
y = encoder.transform(y_raw)

print("Attributes: {}".format(X.shape[1]))
print("Examples: {}".format(X.shape[0]))
print("Unique labels: {}".format(np.unique(y)))

Attributes: 4
Examples: 150
Unique labels: [0 1 2]


## Iris classifier
Iris classifier using hand-crafted rules

In [4]:
#
# Manually defined rule-based Iris classifier
#
class IrisClassifier():
    def __init__(self):
        self.means = [0, 0, 0]
        self.stdevs = [0, 0, 0]
        
    # Trains a model on inputs X and labels y
    def fit(self, X, y):
        # Split into species
        X_set = X[0:50]
        X_ver = X[50:100]
        X_vir = X[100:150]
        
        self.means[0] = np.mean(X_set, axis=0)
        self.means[1] = np.mean(X_ver, axis=0)
        self.means[2] = np.mean(X_vir, axis=0)
        self.stdevs[0] = np.std(X_set, axis=0)
        self.stdevs[1] = np.std(X_ver, axis=0)
        self.stdevs[2] = np.std(X_vir, axis=0)
        
        self.print_attr("Setosa", 0)
        self.print_attr("Versicolor", 1)
        self.print_attr("Virginica", 2)
        
        print("\nRules:")
        print("\t    if: (petal_width < 1.0) → Setosa")
        print("\telseif: (petal_length < 4.87 and petal_width < 1.64) → Versicolor")
        print("\t  else:  → Virginica")
    
    # Prints statistics for a species
    def print_attr(self, label, i):
        print(label + ":")
        print("\tsepal_length: {0:.3f} ±{1:.3f} ({2:.3f} to {3:.3f})".format(self.means[i][0], self.stdevs[i][0], self.means[i][0] - self.stdevs[i][0], self.means[i][0] + self.stdevs[i][0]))
        print("\tsepal_width:  {0:.3f} ±{1:.3f} ({2:.3f} to {3:.3f})".format(self.means[i][1], self.stdevs[i][1], self.means[i][1] - self.stdevs[i][1], self.means[i][1] + self.stdevs[i][1]))
        print("\tpetal_length: {0:.3f} ±{1:.3f} ({2:.3f} to {3:.3f})".format(self.means[i][2], self.stdevs[i][2], self.means[i][2] - self.stdevs[i][2], self.means[i][2] + self.stdevs[i][2]))
        print("\tpetal_width:  {0:.3f} ±{1:.3f} ({2:.3f} to {3:.3f})".format(self.means[i][3], self.stdevs[i][3], self.means[i][3] - self.stdevs[i][3], self.means[i][3] + self.stdevs[i][3]))
        
    # Predicts a list of input examples
    def predict(self, X):
        preds = []
        for r in range(0, len(X)):
            xi = X[r]
            
            # Rule 1: Setosa has very low petal width
            if xi[3] < 1.0:
                preds.append(0)
            # Rule 2: Versicolor has lower petal length and width
            # compared to Virginica
            elif xi[2] < 4.87 and xi[3] < 1.64:
                preds.append(1)
            # Rule 3: If not Rule 1 or 2 applies, predict as Virginica
            else:
                preds.append(2)
            
        return preds

## Evaluation

In [5]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

# Init and train classifier
model = IrisClassifier()
model.fit(X, y)

# Predict all data
preds = model.predict(X)

# Calculate accuracy score
accuracy = accuracy_score(y, preds)
print("\nAccuracy: {0:.2f}%".format(accuracy*100))

# Confusion Matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y, preds))

Setosa:
	sepal_length: 5.006 ±0.349 (4.657 to 5.355)
	sepal_width:  3.418 ±0.377 (3.041 to 3.795)
	petal_length: 1.464 ±0.172 (1.292 to 1.636)
	petal_width:  0.244 ±0.106 (0.138 to 0.350)
Versicolor:
	sepal_length: 5.936 ±0.511 (5.425 to 6.447)
	sepal_width:  2.770 ±0.311 (2.459 to 3.081)
	petal_length: 4.260 ±0.465 (3.795 to 4.725)
	petal_width:  1.326 ±0.196 (1.130 to 1.522)
Virginica:
	sepal_length: 6.588 ±0.629 (5.959 to 7.217)
	sepal_width:  2.974 ±0.319 (2.655 to 3.293)
	petal_length: 5.552 ±0.546 (5.006 to 6.098)
	petal_width:  2.026 ±0.272 (1.754 to 2.298)

Rules:
	    if: (petal_width < 1.0) → Setosa
	elseif: (petal_length < 4.87 and petal_width < 1.64) → Versicolor
	  else:  → Virginica

Accuracy: 96.67%

Confusion Matrix:
[[50  0  0]
 [ 0 45  5]
 [ 0  0 50]]
