In [18]:
import json
import random
from collections import defaultdict

class LearningSystem(object):
    def __init__(self):
        self.attribute_value = defaultdict(lambda: defaultdict(int))
        self.attribute_weights = defaultdict(int)

    def fit(self, train_set, train_labels):
        for i, instance in enumerate(train_set):
            for attr, value in instance.items():
                if train_labels[i] == 1:
                    self.attribute_value[attr][value] += 1
                    if self.attribute_value[attr][value] > 0:
                        self.attribute_weights[attr] += 1 
                elif train_labels[i] == 0:
                    self.attribute_value[attr][value] -= 1

    def predict(self, instance):
        weighted_sum = 0
        for attr, value in instance.items():
            weighted_sum += self.attribute_value[attr][value] * self.attribute_weights[attr]
        if weighted_sum < 0:
            return 0
        return 1

def define_model():
    return LearningSystem()

def test(model, test_set_data, test_labels):
    correct = 0
    for i in range(len(test_set_data)):
        pred = model.predict(test_set_data[i])
        if pred == test_labels[i]:
            correct += 1
    accuracy = correct / len(test_set_data)
    return accuracy



In [19]:
sample_set_data = json.load(open('hw3-sample_set_data.json'))
sample_set_labels = json.load(open('hw3-sample_set_labels.json'))

train_set = sample_set_data[:-500]
train_labels = sample_set_labels[:-500]
test_set = sample_set_data[-500:]
test_labels = sample_set_labels[-500:]

model = define_model()
model.fit(train_set, train_labels)
print("Accuracy:", test(model, test_set, test_labels))


Accuracy: 0.89
