In [2]:
import numpy as np
import pandas as pd

In [4]:
df = pd.read_csv("golf-dataset.csv")

In [5]:
df.head()

Unnamed: 0,Outlook,Temp,Humidity,Windy,Play Golf
0,Rainy,Hot,High,False,No
1,Rainy,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Sunny,Mild,High,False,Yes
4,Sunny,Cool,Normal,False,Yes


In [6]:
df.shape

(14, 5)

In [11]:
class OneR:
    def __init__(self):
        self.rule = None
        self.accuracy = 0.0
        self.classes = []

    def fit(self, X_train, y_train):
        if len(X_train) != len(y_train):
            raise Exception("Invalid training data")

        X_train = np.array(X_train).T
        y_train = np.array(y_train)
        training_samples = len(y_train)
        self.classes = list(np.unique(y_train))

        if len(self.classes) != 2:
            raise Exception("Algorithm is made for binary classification only")

        accuracy = 0.0

        for idx, column in enumerate(X_train):
            data = {k: {key: 0 for key in self.classes} for k in np.unique(column)}

            for d, y in zip(column, y_train):
                data[d][y] += 1

            data = pd.DataFrame(data)
            correct_samples = 0
            rule = {}

            for col in data.columns:
                prediction = np.argmax(data[col])
                rule[col] = prediction
                correct_samples += data[col][prediction]

            accuracy = correct_samples / training_samples

            if accuracy > self.accuracy:
                self.accuracy = accuracy
                self.rule = idx, rule

        return self.rule

    def predict(self, X_test):
        X_test = np.array(X_test)

        if not self.rule:
            raise Exception("Model not trained")

        idx, rule = self.rule
        y_pred = []

        for itr in X_test:
            y_pred.append(rule[itr[idx]])

        return y_pred

    def evaluate(self, X_test, y_test):
        if len(X_test) != len(y_test):
            raise Exception("Invalid data")

        y_pred = self.predict(X_test)
        correct = 0
        total = len(y_test)

        for pred, actual in zip(y_pred, y_test):
            if self.classes[pred] == actual:
                correct += 1

        return correct / total

In [12]:
y = df['Play Golf']
X = df.drop('Play Golf', axis=1)
model = OneR()
model.fit(X, y)

(0, {'Overcast': 1, 'Rainy': 0, 'Sunny': 1})

In [13]:
model.accuracy

0.7142857142857143