From e99fabdb54142973fb2134283be32334cd5d70b4 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 00:46:31 -0500 Subject: [PATCH 01/11] Added the datawrapper and tests for the datawrapper --- classrank/filters/datawrapper.py | 50 ++++++++++++++++++ test/test_datawrapper.py | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 classrank/filters/datawrapper.py create mode 100644 test/test_datawrapper.py diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py new file mode 100644 index 0000000..3deac64 --- /dev/null +++ b/classrank/filters/datawrapper.py @@ -0,0 +1,50 @@ +class DataWrapper: + def __init__(self, instances): + self.dataDict = instances + self.instanceLookup = {} + self.featureLookup = {} + createLookups() + self.data = [[None for feature in self.featureLookup] for instance in instanceLookup] + convertData() + + def createLookups(self): + instanceCounter = 0 + featureCounter = 0 + for instance in self.datadict: + if instance not in self.instanceLookup: + instanceLookup[instance] = instanceCounter + instanceCounter += 1 + for feature in self.dataDict[instance]: + if feature not in self.featureLookup: + featureLookup[feature] = featureCounter + featureCounter += 1 + + def convertData(self): + for instance in self.dataDict: + for feature in self.dataDict[instance]: + data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] + + def addData(self, instances): + #update the data dictionary + for instance in instances: + if instance in self.dataDict: + self.dataDict[instance].update(instances[intance]) + else: + self.dataDict[instance] = instances[instance] + #probably more taxing than necesarry + createLookups() + convertData() + + def getData(self): + return self.data + + def getInstanceLookup(self): + return self.instanceLookup + + def getFeatureLookup(self): + return self.featureLookup + + def getDataDict(self): + return self.dataDict + + diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py new file mode 100644 index 0000000..574fbc4 --- /dev/null +++ b/test/test_datawrapper.py @@ -0,0 +1,87 @@ +import unittest + +from classrank.filters.datawrapper import DataWrapper +class TestDataWrapper(unittest.TestCase): + + def setup(self): + self.dataset = { + 'Lisa Rose': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'Superman Returns': 3.5, + 'You, Me and Dupree': 2.5, + 'The Night Listener': 3.0 + }, + 'Gene Seymour': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 1.5, + 'Superman Returns': 5.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 3.5 + }, + 'Michael Phillips': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.0, + 'Superman Returns': 3.5, + 'The Night Listener': 4.0 + }, + 'Claudia Puig': { + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'The Night Listener': 4.5, + 'Superman Returns': 4.0, + 'You, Me and Dupree': 2.5 + }, + 'Mick LaSalle': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'Just My Luck': 2.0, + 'Superman Returns': 3.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 2.0 + }, + 'Jack Matthews': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'The Night Listener': 3.0, + 'Superman Returns': 5.0, + 'You, Me and Dupree': 3.5 + }, + 'Toby': { + 'Snakes on a Plane':4.5, + 'You, Me and Dupree':1.0, + 'Superman Returns':4.0 + } + } + + self.wrapper = DataWrapper(self.dataset) + + def testCreateLookups(self): + self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) + self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) + + def testGetters(self): + self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) + self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) + + self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) + self.assertNotEqual(self.wrapper.getFeatureLookup(), {}) + + self.assertIsEqual(self.wrapper.getDataDict(), self.dataset) + + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + + def testAddData(self): + dataDict = self.wrapper.getDataDict() + tempData = self.wrapper.getData() + + instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + + self.wrapper.addData(instance) + self.assertNotEqual(dataDict, self.wrapper.getDataDict()) + self.assertNotEqual(tempData, self.wrapper.getData()) + + def testConvertData(self): + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From f00dea12632b4f9804b9dbacd93a590023a11903 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 01:06:03 -0500 Subject: [PATCH 02/11] Intermediary commit --- test/test_datawrapper.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py index 574fbc4..dd76543 100644 --- a/test/test_datawrapper.py +++ b/test/test_datawrapper.py @@ -2,8 +2,8 @@ from classrank.filters.datawrapper import DataWrapper class TestDataWrapper(unittest.TestCase): - - def setup(self): + + def setUp(self): self.dataset = { 'Lisa Rose': { 'Lady in the Water': 2.5, @@ -55,14 +55,13 @@ def setup(self): 'Superman Returns':4.0 } } - self.wrapper = DataWrapper(self.dataset) - def testCreateLookups(self): + def test_create_lookups(self): self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - def testGetters(self): + def test_getters(self): self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) @@ -73,7 +72,7 @@ def testGetters(self): self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) - def testAddData(self): + def test_add_data(self): dataDict = self.wrapper.getDataDict() tempData = self.wrapper.getData() @@ -83,5 +82,5 @@ def testAddData(self): self.assertNotEqual(dataDict, self.wrapper.getDataDict()) self.assertNotEqual(tempData, self.wrapper.getData()) - def testConvertData(self): + def test_convert_data(self): self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From ed7fd402c9ef54ee8f6d6c7c0bc1440323e63a31 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 02:56:32 -0500 Subject: [PATCH 03/11] DataWrapper now passes all tests; however, it is not hooked up to the collaborative filter yet. --- classrank/filters/datawrapper.py | 22 +++++++-------- test/test_datawrapper.py | 48 +++++++++++++++++--------------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 3deac64..127e445 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -3,37 +3,37 @@ def __init__(self, instances): self.dataDict = instances self.instanceLookup = {} self.featureLookup = {} - createLookups() - self.data = [[None for feature in self.featureLookup] for instance in instanceLookup] - convertData() - + self.createLookups() + self.data = [[None for feature in self.featureLookup] for instance in self.instanceLookup] + self.convertData() + def createLookups(self): instanceCounter = 0 featureCounter = 0 - for instance in self.datadict: + for instance in self.dataDict: if instance not in self.instanceLookup: - instanceLookup[instance] = instanceCounter + self.instanceLookup[instance] = instanceCounter instanceCounter += 1 for feature in self.dataDict[instance]: if feature not in self.featureLookup: - featureLookup[feature] = featureCounter + self.featureLookup[feature] = featureCounter featureCounter += 1 def convertData(self): for instance in self.dataDict: for feature in self.dataDict[instance]: - data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] + self.data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] def addData(self, instances): #update the data dictionary for instance in instances: if instance in self.dataDict: - self.dataDict[instance].update(instances[intance]) + self.dataDict[instance].update(instances[instance]) else: self.dataDict[instance] = instances[instance] #probably more taxing than necesarry - createLookups() - convertData() + self.createLookups() + self.convertData() def getData(self): return self.data diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py index dd76543..d1acdb9 100644 --- a/test/test_datawrapper.py +++ b/test/test_datawrapper.py @@ -1,6 +1,8 @@ import unittest +import copy from classrank.filters.datawrapper import DataWrapper + class TestDataWrapper(unittest.TestCase): def setUp(self): @@ -57,30 +59,32 @@ def setUp(self): } self.wrapper = DataWrapper(self.dataset) - def test_create_lookups(self): - self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) - self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - - def test_getters(self): - self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) - self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) - - self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - self.assertNotEqual(self.wrapper.getFeatureLookup(), {}) + def test_create_lookups(self): + temp = {} + self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp)) + self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp)) - self.assertIsEqual(self.wrapper.getDataDict(), self.dataset) - - self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + def test_getters(self): + temp = {} + self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp)) + self.assertNotEqual(self.wrapper.getInstanceLookup(), type(temp)) - def test_add_data(self): - dataDict = self.wrapper.getDataDict() - tempData = self.wrapper.getData() + self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp)) + self.assertNotEqual(self.wrapper.getFeatureLookup(), type(temp)) - instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + self.assertEqual(self.wrapper.getDataDict(), self.dataset) - self.wrapper.addData(instance) - self.assertNotEqual(dataDict, self.wrapper.getDataDict()) - self.assertNotEqual(tempData, self.wrapper.getData()) + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) - def test_convert_data(self): - self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + def test_add_data(self): + tempData = copy.deepcopy(self.wrapper.getData()) + tempDataDict = copy.deepcopy(self.wrapper.getDataDict()) + + instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + instance['Lisa Rose'] = {'The Night Listener' : 5.0} + self.wrapper.addData(instance) + self.assertNotEqual(tempDataDict, self.wrapper.getDataDict()) + self.assertNotEqual(tempData, self.wrapper.getData()) + + def test_convert_data(self): + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From b99057526e1aa880e6ff478a24bdfc5855128758 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 00:46:31 -0500 Subject: [PATCH 04/11] Added the datawrapper and tests for the datawrapper --- classrank/filters/datawrapper.py | 50 ++++++++++++++++++ test/test_datawrapper.py | 87 ++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 classrank/filters/datawrapper.py create mode 100644 test/test_datawrapper.py diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py new file mode 100644 index 0000000..3deac64 --- /dev/null +++ b/classrank/filters/datawrapper.py @@ -0,0 +1,50 @@ +class DataWrapper: + def __init__(self, instances): + self.dataDict = instances + self.instanceLookup = {} + self.featureLookup = {} + createLookups() + self.data = [[None for feature in self.featureLookup] for instance in instanceLookup] + convertData() + + def createLookups(self): + instanceCounter = 0 + featureCounter = 0 + for instance in self.datadict: + if instance not in self.instanceLookup: + instanceLookup[instance] = instanceCounter + instanceCounter += 1 + for feature in self.dataDict[instance]: + if feature not in self.featureLookup: + featureLookup[feature] = featureCounter + featureCounter += 1 + + def convertData(self): + for instance in self.dataDict: + for feature in self.dataDict[instance]: + data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] + + def addData(self, instances): + #update the data dictionary + for instance in instances: + if instance in self.dataDict: + self.dataDict[instance].update(instances[intance]) + else: + self.dataDict[instance] = instances[instance] + #probably more taxing than necesarry + createLookups() + convertData() + + def getData(self): + return self.data + + def getInstanceLookup(self): + return self.instanceLookup + + def getFeatureLookup(self): + return self.featureLookup + + def getDataDict(self): + return self.dataDict + + diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py new file mode 100644 index 0000000..574fbc4 --- /dev/null +++ b/test/test_datawrapper.py @@ -0,0 +1,87 @@ +import unittest + +from classrank.filters.datawrapper import DataWrapper +class TestDataWrapper(unittest.TestCase): + + def setup(self): + self.dataset = { + 'Lisa Rose': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'Superman Returns': 3.5, + 'You, Me and Dupree': 2.5, + 'The Night Listener': 3.0 + }, + 'Gene Seymour': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 1.5, + 'Superman Returns': 5.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 3.5 + }, + 'Michael Phillips': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.0, + 'Superman Returns': 3.5, + 'The Night Listener': 4.0 + }, + 'Claudia Puig': { + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'The Night Listener': 4.5, + 'Superman Returns': 4.0, + 'You, Me and Dupree': 2.5 + }, + 'Mick LaSalle': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'Just My Luck': 2.0, + 'Superman Returns': 3.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 2.0 + }, + 'Jack Matthews': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'The Night Listener': 3.0, + 'Superman Returns': 5.0, + 'You, Me and Dupree': 3.5 + }, + 'Toby': { + 'Snakes on a Plane':4.5, + 'You, Me and Dupree':1.0, + 'Superman Returns':4.0 + } + } + + self.wrapper = DataWrapper(self.dataset) + + def testCreateLookups(self): + self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) + self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) + + def testGetters(self): + self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) + self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) + + self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) + self.assertNotEqual(self.wrapper.getFeatureLookup(), {}) + + self.assertIsEqual(self.wrapper.getDataDict(), self.dataset) + + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + + def testAddData(self): + dataDict = self.wrapper.getDataDict() + tempData = self.wrapper.getData() + + instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + + self.wrapper.addData(instance) + self.assertNotEqual(dataDict, self.wrapper.getDataDict()) + self.assertNotEqual(tempData, self.wrapper.getData()) + + def testConvertData(self): + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From 0b7d6a8218bbc35c90bedf9662963f24ae728768 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 01:06:03 -0500 Subject: [PATCH 05/11] Intermediary commit --- test/test_datawrapper.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py index 574fbc4..dd76543 100644 --- a/test/test_datawrapper.py +++ b/test/test_datawrapper.py @@ -2,8 +2,8 @@ from classrank.filters.datawrapper import DataWrapper class TestDataWrapper(unittest.TestCase): - - def setup(self): + + def setUp(self): self.dataset = { 'Lisa Rose': { 'Lady in the Water': 2.5, @@ -55,14 +55,13 @@ def setup(self): 'Superman Returns':4.0 } } - self.wrapper = DataWrapper(self.dataset) - def testCreateLookups(self): + def test_create_lookups(self): self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - def testGetters(self): + def test_getters(self): self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) @@ -73,7 +72,7 @@ def testGetters(self): self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) - def testAddData(self): + def test_add_data(self): dataDict = self.wrapper.getDataDict() tempData = self.wrapper.getData() @@ -83,5 +82,5 @@ def testAddData(self): self.assertNotEqual(dataDict, self.wrapper.getDataDict()) self.assertNotEqual(tempData, self.wrapper.getData()) - def testConvertData(self): + def test_convert_data(self): self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From ea0b63b9fdcc164430e4e0c2373d9a6676e12b15 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Sun, 14 Feb 2016 02:56:32 -0500 Subject: [PATCH 06/11] DataWrapper now passes all tests; however, it is not hooked up to the collaborative filter yet. --- classrank/filters/datawrapper.py | 22 +++++++-------- test/test_datawrapper.py | 48 +++++++++++++++++--------------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 3deac64..127e445 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -3,37 +3,37 @@ def __init__(self, instances): self.dataDict = instances self.instanceLookup = {} self.featureLookup = {} - createLookups() - self.data = [[None for feature in self.featureLookup] for instance in instanceLookup] - convertData() - + self.createLookups() + self.data = [[None for feature in self.featureLookup] for instance in self.instanceLookup] + self.convertData() + def createLookups(self): instanceCounter = 0 featureCounter = 0 - for instance in self.datadict: + for instance in self.dataDict: if instance not in self.instanceLookup: - instanceLookup[instance] = instanceCounter + self.instanceLookup[instance] = instanceCounter instanceCounter += 1 for feature in self.dataDict[instance]: if feature not in self.featureLookup: - featureLookup[feature] = featureCounter + self.featureLookup[feature] = featureCounter featureCounter += 1 def convertData(self): for instance in self.dataDict: for feature in self.dataDict[instance]: - data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] + self.data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] def addData(self, instances): #update the data dictionary for instance in instances: if instance in self.dataDict: - self.dataDict[instance].update(instances[intance]) + self.dataDict[instance].update(instances[instance]) else: self.dataDict[instance] = instances[instance] #probably more taxing than necesarry - createLookups() - convertData() + self.createLookups() + self.convertData() def getData(self): return self.data diff --git a/test/test_datawrapper.py b/test/test_datawrapper.py index dd76543..d1acdb9 100644 --- a/test/test_datawrapper.py +++ b/test/test_datawrapper.py @@ -1,6 +1,8 @@ import unittest +import copy from classrank.filters.datawrapper import DataWrapper + class TestDataWrapper(unittest.TestCase): def setUp(self): @@ -57,30 +59,32 @@ def setUp(self): } self.wrapper = DataWrapper(self.dataset) - def test_create_lookups(self): - self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) - self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - - def test_getters(self): - self.assertIsInstance(self.wrapper.getInstanceLookup(), {}) - self.assertNotEqual(self.wrapper.getInstanceLookup(), {}) - - self.assertIsInstance(self.wrapper.getFeatureLookup(), {}) - self.assertNotEqual(self.wrapper.getFeatureLookup(), {}) + def test_create_lookups(self): + temp = {} + self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp)) + self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp)) - self.assertIsEqual(self.wrapper.getDataDict(), self.dataset) - - self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + def test_getters(self): + temp = {} + self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp)) + self.assertNotEqual(self.wrapper.getInstanceLookup(), type(temp)) - def test_add_data(self): - dataDict = self.wrapper.getDataDict() - tempData = self.wrapper.getData() + self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp)) + self.assertNotEqual(self.wrapper.getFeatureLookup(), type(temp)) - instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + self.assertEqual(self.wrapper.getDataDict(), self.dataset) - self.wrapper.addData(instance) - self.assertNotEqual(dataDict, self.wrapper.getDataDict()) - self.assertNotEqual(tempData, self.wrapper.getData()) + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) - def test_convert_data(self): - self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) + def test_add_data(self): + tempData = copy.deepcopy(self.wrapper.getData()) + tempDataDict = copy.deepcopy(self.wrapper.getDataDict()) + + instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}} + instance['Lisa Rose'] = {'The Night Listener' : 5.0} + self.wrapper.addData(instance) + self.assertNotEqual(tempDataDict, self.wrapper.getDataDict()) + self.assertNotEqual(tempData, self.wrapper.getData()) + + def test_convert_data(self): + self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]]) From a0430a79a2db91f4e880f032c5f7433f69c10722 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Thu, 25 Feb 2016 21:07:04 -0500 Subject: [PATCH 07/11] Preliminary commit for datawrapper/filter integration --- classrank/filters/collabfilter.py | 42 +++++++++++----- classrank/filters/datawrapper.py | 17 +++++-- test/test_filter.py | 82 +++++++++++++++++++++++++------ 3 files changed, 109 insertions(+), 32 deletions(-) diff --git a/classrank/filters/collabfilter.py b/classrank/filters/collabfilter.py index 3d24d93..d817dfc 100644 --- a/classrank/filters/collabfilter.py +++ b/classrank/filters/collabfilter.py @@ -1,25 +1,34 @@ import numpy as np from sklearn.decomposition import TruncatedSVD from scipy import sparse +from classrank.filters.datawrapper import DataWrapper class CollaborativeFilter: #This takes in a matrix - def __init__(self, data, numRecommendations): - self.dataset = data + def __init__(self, data, numRecommendations=2): + self.dataset = DataWrapper(data) self.updated = False self.sparsedata = None self.sparseifyData() self.svd = TruncatedSVD() self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata)) - def getRecommendation(self, row, column): + def getRecommendation(self, instances): if(self.updated): self.sparseifyData() self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata)) self.updated = False - return self.model[row][column] + ret = {} + for instance in instances: + values = {} + for feature in instances[instance]: + row = self.dataset.getRow(instance) + column = self.dataset.getColumn(feature) + values[feature] = self.model[row][column] + ret[instance] = values + return ret - def updateValue(self, row, column, value): - self.dataset[row][column] = value + def updateValues(self, instances): + self.dataset.addData(instances) self.updated = True def forceModelUpdate(self): @@ -28,11 +37,12 @@ def forceModelUpdate(self): self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata)) def sparseifyData(self): - sparsematrix = sparse.dok_matrix((len(self.dataset), len(self.dataset[0]))) - for i in range(len(self.dataset)): - for j in range(len(self.dataset[i])): - if self.dataset[i][j] is not None: - sparsematrix[i, j] = self.dataset[i][j] + data = self.dataset.getData() + sparsematrix = sparse.dok_matrix((len(data), len(data[0]))) + for i in range(len(data)): + for j in range(len(data[i])): + if data[i][j] is not None: + sparsematrix[i, j] = data[i][j] self.sparsedata = sparsematrix def getSparseData(self): @@ -41,8 +51,14 @@ def getSparseData(self): def getModel(self): return self.model - def getData(self): - return self.dataset + def getData(self, *args): + if len(args) == 2: + return self.dataset.getData(args[0], args[1]) + else: + return self.dataset.getData() def getUpdated(self): return self.updated + + def getDataDict(self): + return self.dataset.getDataDict() diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 127e445..82682e1 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -23,7 +23,7 @@ def convertData(self): for instance in self.dataDict: for feature in self.dataDict[instance]: self.data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature] - + def addData(self, instances): #update the data dictionary for instance in instances: @@ -35,9 +35,12 @@ def addData(self, instances): self.createLookups() self.convertData() - def getData(self): - return self.data - + def getData(self, *args): + if len(args) == 2: + return self.dataDict[self.instanceLookup[args[0]]][self.featureLookup[args[1]]] + else: + return self.data + def getInstanceLookup(self): return self.instanceLookup @@ -46,5 +49,9 @@ def getFeatureLookup(self): def getDataDict(self): return self.dataDict + + def getRow(self, instance): + return self.instanceLookup[instance] - + def getColumn(self, feature): + return self.featureLookup[feature] diff --git a/test/test_filter.py b/test/test_filter.py index b6d66bd..7161b0c 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -7,25 +7,80 @@ class TestSVDFilter(unittest.TestCase): def setUp(self): - self.data = [[2.5, 3.5, 3.0, 3.5, 2.5, 3.0],[3.0, 3.5, 1.5, 5.0, 3.0, 3.5],[2.5, 3.0, None, 3.5, 4.0, None],[None, 3.5, 3.0, 4.0, 4.5, 2.5], [3.5, 4.0, 2.0, 3.0, 3.0, 2.0], [3.0, 4.0, None, 5.0, 3.0, 3.5], [None, 4.5, None, 4.0, None, 1.0]] + self.data = { + 'Lisa Rose': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'Superman Returns': 3.5, + 'You, Me and Dupree': 2.5, + 'The Night Listener': 3.0 + }, + 'Gene Seymour': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 3.5, + 'Just My Luck': 1.5, + 'Superman Returns': 5.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 3.5 + }, + 'Michael Phillips': { + 'Lady in the Water': 2.5, + 'Snakes on a Plane': 3.0, + 'Superman Returns': 3.5, + 'The Night Listener': 4.0 + }, + 'Claudia Puig': { + 'Snakes on a Plane': 3.5, + 'Just My Luck': 3.0, + 'The Night Listener': 4.5, + 'Superman Returns': 4.0, + 'You, Me and Dupree': 2.5 + }, + 'Mick LaSalle': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'Just My Luck': 2.0, + 'Superman Returns': 3.0, + 'The Night Listener': 3.0, + 'You, Me and Dupree': 2.0 + }, + 'Jack Matthews': { + 'Lady in the Water': 3.0, + 'Snakes on a Plane': 4.0, + 'The Night Listener': 3.0, + 'Superman Returns': 5.0, + 'You, Me and Dupree': 3.5 + }, + 'Toby': { + 'Snakes on a Plane':4.5, + 'You, Me and Dupree':1.0, + 'Superman Returns':4.0 + } + } + self.instance = 'Gene Seymour' + self.feature = 'Snakes on a Plane' + self.testInstance = { 'Gene Seymour': { 'Snakes on a Plane': 10} } + self.recTester = { 'Gene Seymour': ['Snakes on a Plane'] } self.fltr = CollaborativeFilter(self.data, 1) + self.test2Instance = {'Gene Seymour' : {'Snakes on a Plane' : 20}} def test_update_value(self): - self.fltr.updateValue(2, 2, 10) - self.assertEqual(10,self.fltr.getData()[2][2]) + self.fltr.updateValues(self.testInstance) + self.assertEqual(10,self.fltr.getDataDict()[self.instance][self.feature]) def test_get_recommendation(self): - recom = self.fltr.getRecommendation(2,2) + recom = self.fltr.getRecommendation(self.recTester) self.assertIsNot(recom, None) def test_force_model_update(self): model = self.fltr.getModel() - self.fltr.updateValue(2, 2, 10) + self.fltr.updateValues(self.testInstance) self.fltr.forceModelUpdate() self.assertTrue(self.listNotEqual(model, self.fltr.getModel())) - def test_get_data(self): - self.assertListEqual(self.data, self.fltr.getData()) + def test_get_data_dict(self): + self.assertEqual(self.data, self.fltr.getDataDict()) def test_get_model(self): temp = np.array([1]) @@ -37,14 +92,13 @@ def test_sparseify_data(self): temp2 = self.fltr.getSparseData() - self.fltr.updateValue(2, 2, 10) - self.fltr.getRecommendation(2, 2) + self.fltr.updateValues(self.testInstance) + self.fltr.getRecommendation(self.recTester) self.assertIsInstance(self.fltr.getSparseData(), type(temp)) self.assertTrue(self.npListNotEqual(self.fltr.getSparseData(), temp2)) temp3 = self.fltr.getSparseData() - - self.fltr.updateValue(2, 2, 20) + self.fltr.updateValues(self.test2Instance) self.fltr.forceModelUpdate() self.assertIsInstance(self.fltr.getSparseData(), type(temp)) self.assertTrue(self.npListNotEqual(self.fltr.getSparseData(), temp2)) @@ -53,13 +107,13 @@ def test_sparseify_data(self): def test_is_updated(self): self.assertFalse(self.fltr.getUpdated()) - self.fltr.updateValue(2, 2, 10) + self.fltr.updateValues(self.testInstance) self.assertTrue(self.fltr.getUpdated()) - self.fltr.getRecommendation(2, 2) + self.fltr.getRecommendation(self.recTester) self.assertFalse(self.fltr.getUpdated()) - self.fltr.updateValue(2, 2, 20) + self.fltr.updateValues(self.test2Instance) self.assertTrue(self.fltr.getUpdated()) self.fltr.forceModelUpdate() From e4476ccd7b6b69ea06880517b639d15e2856f7fe Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Thu, 25 Feb 2016 21:34:54 -0500 Subject: [PATCH 08/11] 100% coverage on the Integration --- classrank/filters/datawrapper.py | 2 +- test/test_filter.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 82682e1..7459991 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -37,7 +37,7 @@ def addData(self, instances): def getData(self, *args): if len(args) == 2: - return self.dataDict[self.instanceLookup[args[0]]][self.featureLookup[args[1]]] + return self.data[self.instanceLookup[args[0]]][self.featureLookup[args[1]]] else: return self.data diff --git a/test/test_filter.py b/test/test_filter.py index 7161b0c..805d3ff 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -81,6 +81,10 @@ def test_force_model_update(self): def test_get_data_dict(self): self.assertEqual(self.data, self.fltr.getDataDict()) + + def test_get_data(self): + self.assertIsNot(self.fltr.getData("Gene Seymour", "Snakes on a Plane"), None) + self.assertIsInstance(self.fltr.getData(), type([]) ) def test_get_model(self): temp = np.array([1]) From 0f68024906c8c99736ca627f1afbf2399cc5376b Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Mon, 21 Mar 2016 21:21:21 -0400 Subject: [PATCH 09/11] Updated the Ratings table and added Query support to the filter --- classrank/database/tables.py | 3 +++ classrank/filters/datawrapper.py | 33 +++++++++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/classrank/database/tables.py b/classrank/database/tables.py index 97a397f..2650def 100644 --- a/classrank/database/tables.py +++ b/classrank/database/tables.py @@ -99,5 +99,8 @@ class Rating(Base): student_id = Column(Integer, ForeignKey('student.uid'), primary_key=True) section_id = Column(Integer, ForeignKey('section.uid'), primary_key=True) rating = Column(Integer, nullable=True) + difficulty = Column(Integer, nullable=True) + workload = Column(Integer, nullable=True) + grade = Column(Integer, nullable=True) section = relationship('Section', backref='ratings') student = relationship('Student', backref='ratings') diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 7459991..6ae818c 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -1,12 +1,18 @@ +import classrank.database.wrapper as db class DataWrapper: - def __init__(self, instances): - self.dataDict = instances + def __init__(self, instances=dict(), db=None, school="gatech", metric="rating"): + self.db = db + self.datadict = instances + if db: + self.school = school + self.metric = metric + self.queryDB() self.instanceLookup = {} self.featureLookup = {} self.createLookups() self.data = [[None for feature in self.featureLookup] for instance in self.instanceLookup] self.convertData() - + def createLookups(self): instanceCounter = 0 featureCounter = 0 @@ -55,3 +61,24 @@ def getRow(self, instance): def getColumn(self, feature): return self.featureLookup[feature] + + def queryDB(self): + query = wrapper.Query(self.db) + for student in query.query(self.db.Student).filter(self.db.Student==self.school).all(): + results = query.query(self.db.Rating, self.db.Section, self.db.Course).filter(self.db.Rating.student_id == student.uid).\ + filter(self.db.Rating.section_id==self.db.Course.section_id).all() #a tuple of lists + results = zip(*results) #a list of tuples + instance = {} + for result in results: + courseName = query.query(self.db.Course).filter(self.db.Course.uid==result[1].course_id).first() + courseName = courseName.name + if metric == "rating": + rating = result[0][0].rating + elif metric == "grade": + rating = result[0][0].grade + elif metric == "workload": + rating = result[0][0].workload + elif metric == "difficulty": + rating = result[0][0].difficulty + instance[courseName] = rating + self.instances[student.uid] = instance From 073aa69c4fdf272589379e2820b5bd2b3273d42d Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Mon, 21 Mar 2016 21:58:26 -0400 Subject: [PATCH 10/11] Fixed smol typo in datawrapper --- classrank/filters/datawrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index 6ae818c..e3aafb9 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -2,7 +2,7 @@ class DataWrapper: def __init__(self, instances=dict(), db=None, school="gatech", metric="rating"): self.db = db - self.datadict = instances + self.dataDict = instances if db: self.school = school self.metric = metric From 8faca0bc6af61f1fed3574bf0890bca70fe1c230 Mon Sep 17 00:00:00 2001 From: Casey Barnette Date: Mon, 28 Mar 2016 19:28:50 -0400 Subject: [PATCH 11/11] Completed DB integration with the filter --- classrank/filters/collabfilter.py | 15 +++++++---- classrank/filters/datawrapper.py | 42 ++++++++++++++++--------------- test/test_filter.py | 2 +- test/test_filter_db.py | 39 ++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 26 deletions(-) create mode 100644 test/test_filter_db.py diff --git a/classrank/filters/collabfilter.py b/classrank/filters/collabfilter.py index d817dfc..58a8282 100644 --- a/classrank/filters/collabfilter.py +++ b/classrank/filters/collabfilter.py @@ -4,14 +4,19 @@ from classrank.filters.datawrapper import DataWrapper class CollaborativeFilter: #This takes in a matrix - def __init__(self, data, numRecommendations=2): - self.dataset = DataWrapper(data) + def __init__(self, data=dict(), numRecommendations=1, db=None, metric="rating", school="gatech"): + self.dataset = DataWrapper(instances=data, db=db, school=school, metric=metric) self.updated = False self.sparsedata = None self.sparseifyData() - self.svd = TruncatedSVD() - self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata)) - + try: + self.svd = TruncatedSVD(n_components=numRecommendations) + self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata)) + except ValueError: + self.svd = None + self.model = None + raise ValueError("Not enough ratings for predictions") + def getRecommendation(self, instances): if(self.updated): self.sparseifyData() diff --git a/classrank/filters/datawrapper.py b/classrank/filters/datawrapper.py index e3aafb9..d47ed13 100644 --- a/classrank/filters/datawrapper.py +++ b/classrank/filters/datawrapper.py @@ -1,4 +1,4 @@ -import classrank.database.wrapper as db +from classrank.database.wrapper import Query class DataWrapper: def __init__(self, instances=dict(), db=None, school="gatech", metric="rating"): self.db = db @@ -63,22 +63,24 @@ def getColumn(self, feature): return self.featureLookup[feature] def queryDB(self): - query = wrapper.Query(self.db) - for student in query.query(self.db.Student).filter(self.db.Student==self.school).all(): - results = query.query(self.db.Rating, self.db.Section, self.db.Course).filter(self.db.Rating.student_id == student.uid).\ - filter(self.db.Rating.section_id==self.db.Course.section_id).all() #a tuple of lists - results = zip(*results) #a list of tuples - instance = {} - for result in results: - courseName = query.query(self.db.Course).filter(self.db.Course.uid==result[1].course_id).first() - courseName = courseName.name - if metric == "rating": - rating = result[0][0].rating - elif metric == "grade": - rating = result[0][0].grade - elif metric == "workload": - rating = result[0][0].workload - elif metric == "difficulty": - rating = result[0][0].difficulty - instance[courseName] = rating - self.instances[student.uid] = instance + with Query(self.db) as query: + for student in query.query(self.db.student).filter(self.db.school.abbreviation==self.school).all(): + results = query.query(self.db.rating, self.db.section).filter(self.db.rating.student_id == student.uid).\ + filter(self.db.rating.section_id==self.db.section.uid).all() #a tuple of lists + #results = list(zip(*results)) #a list of tuples + #pprint.pprint(results) + instance = {} + for result in results: + courseName = query.query(self.db.course).filter(self.db.course.uid==result[1].course_id).first() + courseName = courseName.name + rating = result[0].__getattribute__(self.metric) + #if self.metric == "rating": + # rating = result[0][0].rating + #elif self.metric == "grade": + # rating = result[0][0].grade + #elif self.metric == "workload": + # rating = result[0][0].workload + #elif self.metric == "difficulty": + # rating = result[0][0].difficulty + instance[courseName] = rating + self.dataDict[student.uid] = instance diff --git a/test/test_filter.py b/test/test_filter.py index 805d3ff..83c1c2b 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -1,5 +1,5 @@ import unittest - +from unittest.mock import Mock, MagicMock, patch from classrank.filters.collabfilter import CollaborativeFilter import numpy as np from scipy import sparse diff --git a/test/test_filter_db.py b/test/test_filter_db.py new file mode 100644 index 0000000..b0e977c --- /dev/null +++ b/test/test_filter_db.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import Mock, MagicMock, patch +from classrank.filters.collabfilter import CollaborativeFilter +import numpy as np +import os +from classrank.database.wrapper import Database, Query + +class TestDatabaseFilter(unittest.TestCase): + def setUp(self): + self.conn = Database(engine=os.environ.get("CONNECTION", "sqlite:///:memory:")) + school = self.conn.school(name="Georgia Tech", abbreviation="gatech") + course = self.conn.course(school=school, name="Intro Java", number="1331", subject="CS") + course2 = self.conn.course(school=school, name="Stuff", number="1332", subject="CS") + section1 = self.conn.section(course=course, semester="fall", year=2016, name="A1") + section2 = self.conn.section(course=course, semester="fall", year=2016, name="A2") + self.section3 = self.conn.section(course=course2, semester="spring",year=2015, name="A") + account = self.conn.account(username="test", email_address="test@test.com", password_hash=b"t", password_salt=b"t") + student = self.conn.student(account=account, school=school) + account2 = self.conn.account(username="test2", email_address="test2@test.com", password_hash=b"t", password_salt=b"t") + self.student2 = self.conn.student(account=account2, school=school) + with Query(self.conn) as q: + q.add(school) + q.add(course) + q.add(section1) + q.add(section2) + q.add(course2) + q.add(self.section3) + q.add(account) + q.add(student) + q.add(self.student2) + q.add(self.conn.rating(student=student, section=section1, rating=5)) + q.add(self.conn.rating(student=self.student2, section=section2, rating=3)) + def test_filter_query(self): + with self.assertRaises(ValueError): + cf = CollaborativeFilter(db=self.conn) + with Query(self.conn) as q: + q.add(self.conn.rating(student=self.student2, section=self.section3, rating=4)) + cf = CollaborativeFilter(db=self.conn) + self.assertIsInstance(cf.getData(), type([]))