Skip to content

Commit

Permalink
Merge branch 'master' into test-db
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuamorton committed Apr 22, 2016
2 parents e610112 + 5fdf1f3 commit ff95e07
Show file tree
Hide file tree
Showing 6 changed files with 327 additions and 30 deletions.
3 changes: 3 additions & 0 deletions classrank/database/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,8 @@ class Rating(Base):
student_id = Column(Integer, ForeignKey('student.uid'), primary_key=True)
section_id = Column(Integer, ForeignKey('section.uid'), primary_key=True)
rating = Column(Integer, nullable=True)
difficulty = Column(Integer, nullable=True)
workload = Column(Integer, nullable=True)
grade = Column(Integer, nullable=True)
section = relationship('Section', backref='ratings')
student = relationship('Student', backref='ratings')
53 changes: 37 additions & 16 deletions classrank/filters/collabfilter.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,39 @@
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy import sparse
from classrank.filters.datawrapper import DataWrapper
class CollaborativeFilter:
#This takes in a matrix
def __init__(self, data, numRecommendations):
self.dataset = data
def __init__(self, data=dict(), numRecommendations=1, db=None, metric="rating", school="gatech"):
self.dataset = DataWrapper(instances=data, db=db, school=school, metric=metric)
self.updated = False
self.sparsedata = None
self.sparseifyData()
self.svd = TruncatedSVD()
self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata))

def getRecommendation(self, row, column):
try:
self.svd = TruncatedSVD(n_components=numRecommendations)
self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata))
except ValueError:
self.svd = None
self.model = None
raise ValueError("Not enough ratings for predictions")

def getRecommendation(self, instances):
if(self.updated):
self.sparseifyData()
self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata))
self.updated = False
return self.model[row][column]
ret = {}
for instance in instances:
values = {}
for feature in instances[instance]:
row = self.dataset.getRow(instance)
column = self.dataset.getColumn(feature)
values[feature] = self.model[row][column]
ret[instance] = values
return ret

def updateValue(self, row, column, value):
self.dataset[row][column] = value
def updateValues(self, instances):
self.dataset.addData(instances)
self.updated = True

def forceModelUpdate(self):
Expand All @@ -28,11 +42,12 @@ def forceModelUpdate(self):
self.model = self.svd.inverse_transform(self.svd.fit_transform(self.sparsedata))

def sparseifyData(self):
sparsematrix = sparse.dok_matrix((len(self.dataset), len(self.dataset[0])))
for i in range(len(self.dataset)):
for j in range(len(self.dataset[i])):
if self.dataset[i][j] is not None:
sparsematrix[i, j] = self.dataset[i][j]
data = self.dataset.getData()
sparsematrix = sparse.dok_matrix((len(data), len(data[0])))
for i in range(len(data)):
for j in range(len(data[i])):
if data[i][j] is not None:
sparsematrix[i, j] = data[i][j]
self.sparsedata = sparsematrix

def getSparseData(self):
Expand All @@ -41,8 +56,14 @@ def getSparseData(self):
def getModel(self):
return self.model

def getData(self):
return self.dataset
def getData(self, *args):
if len(args) == 2:
return self.dataset.getData(args[0], args[1])
else:
return self.dataset.getData()

def getUpdated(self):
return self.updated

def getDataDict(self):
return self.dataset.getDataDict()
86 changes: 86 additions & 0 deletions classrank/filters/datawrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from classrank.database.wrapper import Query
class DataWrapper:
def __init__(self, instances=dict(), db=None, school="gatech", metric="rating"):
self.db = db
self.dataDict = instances
if db:
self.school = school
self.metric = metric
self.queryDB()
self.instanceLookup = {}
self.featureLookup = {}
self.createLookups()
self.data = [[None for feature in self.featureLookup] for instance in self.instanceLookup]
self.convertData()

def createLookups(self):
instanceCounter = 0
featureCounter = 0
for instance in self.dataDict:
if instance not in self.instanceLookup:
self.instanceLookup[instance] = instanceCounter
instanceCounter += 1
for feature in self.dataDict[instance]:
if feature not in self.featureLookup:
self.featureLookup[feature] = featureCounter
featureCounter += 1

def convertData(self):
for instance in self.dataDict:
for feature in self.dataDict[instance]:
self.data[self.instanceLookup[instance]][self.featureLookup[feature]] = self.dataDict[instance][feature]

def addData(self, instances):
#update the data dictionary
for instance in instances:
if instance in self.dataDict:
self.dataDict[instance].update(instances[instance])
else:
self.dataDict[instance] = instances[instance]
#probably more taxing than necesarry
self.createLookups()
self.convertData()

def getData(self, *args):
if len(args) == 2:
return self.data[self.instanceLookup[args[0]]][self.featureLookup[args[1]]]
else:
return self.data

def getInstanceLookup(self):
return self.instanceLookup

def getFeatureLookup(self):
return self.featureLookup

def getDataDict(self):
return self.dataDict

def getRow(self, instance):
return self.instanceLookup[instance]

def getColumn(self, feature):
return self.featureLookup[feature]

def queryDB(self):
with Query(self.db) as query:
for student in query.query(self.db.student).filter(self.db.school.abbreviation==self.school).all():
results = query.query(self.db.rating, self.db.section).filter(self.db.rating.student_id == student.uid).\
filter(self.db.rating.section_id==self.db.section.uid).all() #a tuple of lists
#results = list(zip(*results)) #a list of tuples
#pprint.pprint(results)
instance = {}
for result in results:
courseName = query.query(self.db.course).filter(self.db.course.uid==result[1].course_id).first()
courseName = courseName.name
rating = result[0].__getattribute__(self.metric)
#if self.metric == "rating":
# rating = result[0][0].rating
#elif self.metric == "grade":
# rating = result[0][0].grade
#elif self.metric == "workload":
# rating = result[0][0].workload
#elif self.metric == "difficulty":
# rating = result[0][0].difficulty
instance[courseName] = rating
self.dataDict[student.uid] = instance
90 changes: 90 additions & 0 deletions test/test_datawrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import unittest
import copy

from classrank.filters.datawrapper import DataWrapper

class TestDataWrapper(unittest.TestCase):

def setUp(self):
self.dataset = {
'Lisa Rose': {
'Lady in the Water': 2.5,
'Snakes on a Plane': 3.5,
'Just My Luck': 3.0,
'Superman Returns': 3.5,
'You, Me and Dupree': 2.5,
'The Night Listener': 3.0
},
'Gene Seymour': {
'Lady in the Water': 3.0,
'Snakes on a Plane': 3.5,
'Just My Luck': 1.5,
'Superman Returns': 5.0,
'The Night Listener': 3.0,
'You, Me and Dupree': 3.5
},
'Michael Phillips': {
'Lady in the Water': 2.5,
'Snakes on a Plane': 3.0,
'Superman Returns': 3.5,
'The Night Listener': 4.0
},
'Claudia Puig': {
'Snakes on a Plane': 3.5,
'Just My Luck': 3.0,
'The Night Listener': 4.5,
'Superman Returns': 4.0,
'You, Me and Dupree': 2.5
},
'Mick LaSalle': {
'Lady in the Water': 3.0,
'Snakes on a Plane': 4.0,
'Just My Luck': 2.0,
'Superman Returns': 3.0,
'The Night Listener': 3.0,
'You, Me and Dupree': 2.0
},
'Jack Matthews': {
'Lady in the Water': 3.0,
'Snakes on a Plane': 4.0,
'The Night Listener': 3.0,
'Superman Returns': 5.0,
'You, Me and Dupree': 3.5
},
'Toby': {
'Snakes on a Plane':4.5,
'You, Me and Dupree':1.0,
'Superman Returns':4.0
}
}
self.wrapper = DataWrapper(self.dataset)

def test_create_lookups(self):
temp = {}
self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp))
self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp))

def test_getters(self):
temp = {}
self.assertIsInstance(self.wrapper.getInstanceLookup(), type(temp))
self.assertNotEqual(self.wrapper.getInstanceLookup(), type(temp))

self.assertIsInstance(self.wrapper.getFeatureLookup(), type(temp))
self.assertNotEqual(self.wrapper.getFeatureLookup(), type(temp))

self.assertEqual(self.wrapper.getDataDict(), self.dataset)

self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]])

def test_add_data(self):
tempData = copy.deepcopy(self.wrapper.getData())
tempDataDict = copy.deepcopy(self.wrapper.getDataDict())

instance = {'Casey' : { 'Snakes on a Plane': 5.0, 'Superman Returns' : 3.4}}
instance['Lisa Rose'] = {'The Night Listener' : 5.0}
self.wrapper.addData(instance)
self.assertNotEqual(tempDataDict, self.wrapper.getDataDict())
self.assertNotEqual(tempData, self.wrapper.getData())

def test_convert_data(self):
self.assertNotEqual(self.wrapper.getData(), [[None],[None],[None],[None],[None],[None],[None]])
Loading

0 comments on commit ff95e07

Please sign in to comment.