In [2]:
import json
import random
from collections import defaultdict

class LearningSystem(object):
	"""
	Set up a learning system class, i.e., the approach you will apply.
	Feel free to create any functions under the class.

	Two functions are REQUIRED:
	- LearningSystem.fit(train_set, train_labels)
	- LearningSystem.predict(instance)
	"""

	def __init__(self):
		"""
		You can have any other parameters here.
		"""
		self.attribute_value = defaultdict(lambda: defaultdict(int))


	def fit(self, train_set, train_labels):
		"""
		Train the system with this function.
		Do NOT set any input variable other than train_set and train_labels for this function.
		========
		train_data: training data without labels. It should be a list of dictionaries.
		train_labels: A list of labels of training data. Each label has the same index as its data in train_data.
					  All labels are binary (0, 1).
		"""
		for i, instance in enumerate(train_set):
			for attr, value in instance.items():
				if train_labels[i] == 1:
					self.attribute_value[attr][value] += 1
				elif train_labels[i] == 0:
					self.attribute_value[attr][value] -= 1

	def predict(self, instance):
		"""
		Predict the label of an instance (a single dictionary) given.
		"""
		sum = 0
		for attr, value in instance.items():
			sum += self.attribute_value[attr][value]
		if sum < 0:
			return 0
		return 1


# Then you need to make the separate define_model() function,
# so you can have any initialized parameter defined.
# Autograder will define the model object based on your define_model() function.
def define_model():
	return LearningSystem()


"""
The following codes are for testing with sample data only.
REMEMBER TO REMOVE THEM BEFORE SUBMISSION.
=============================================================================
In this homework, a sample collection of shuffled data is provided.
You might try with the data provided like in the following before submission:
"""

def test(model, test_set_data, test_labels):
	correct = 0
	for i in range(len(test_set_data)):
		pred = model.predict(test_set_data[i])
		if pred == test_labels[i]:
			correct += 1
	accuracy = correct / len(test_set_data)
	return accuracy


In [4]:
import json
import random
from collections import defaultdict

class LearningSystem(object):
    def __init__(self):
        self.attribute_value = defaultdict(lambda: defaultdict(int))
        self.attribute_weights = defaultdict(int)

    def fit(self, train_set, train_labels):
        for i, instance in enumerate(train_set):
            for attr, value in instance.items():
                if train_labels[i] == 1:
                    self.attribute_value[attr][value] += 2
                    if self.attribute_value[attr][value] > 0:
                        self.attribute_weights[attr] += 1 
                elif train_labels[i] == 0:
                    self.attribute_value[attr][value] -= 1

    def predict(self, instance):
        weighted_sum = 0
        for attr, value in instance.items():
            weighted_sum += self.attribute_value[attr][value] * self.attribute_weights[attr]
        if weighted_sum < 0:
            return 0
        return 1

def define_model():
    return LearningSystem()

def test(model, test_set_data, test_labels):
    correct = 0
    for i in range(len(test_set_data)):
        pred = model.predict(test_set_data[i])
        if pred == test_labels[i]:
            correct += 1
    accuracy = correct / len(test_set_data)
    return accuracy



In [5]:
sample_set_data = json.load(open('hw3-sample_set_data.json'))
sample_set_labels = json.load(open('hw3-sample_set_labels.json'))

train_set = sample_set_data[:-500]
train_labels = sample_set_labels[:-500]
test_set = sample_set_data[-500:]
test_labels = sample_set_labels[-500:]

model = define_model()
model.fit(train_set, train_labels)
print("Accuracy:", test(model, test_set, test_labels))


Accuracy: 0.89
