In [1]:
from fastai.collab import *
from fastai.tabular.all import *

In [27]:
cwd = Path.home()
names_path = cwd.joinpath("intertemporal/data/names.txt")
file = open(names_path,"r")
names = file.readlines()
names = [element.strip() for element in names]

In [33]:
def load_data(name="charles_oneill"):
    # load training data
    train = pd.read_csv(f"~/intertemporal/data/{name}_train.csv")
    cols = ["SIR", "LDR", "Delay", "Answer"]
    # load testing data
    test = pd.read_csv(f"~/intertemporal/data/{name}_test.csv")
    # load kirby data
    kirby = pd.read_csv(f"~/intertemporal/data/{name}_kirby.csv")
    #return pd.concat([train[cols], test[cols], kirby[cols]])
    return kirby

def load_kirby_data(names):
    cols = ['user', 'question', 'answer']
    frames = []
    for i, name in enumerate(names):
        df = pd.read_csv(f"~/intertemporal/data/{name}_kirby.csv")
        df.rename(columns={'Answer': 'answer'}, inplace=True)
        df['user'] = i
        df['question'] = [x for x in range(len(df))]
        frames.append(df[cols])
    return pd.concat(frames)

In [34]:
names = ['charles_oneill', 'laura_ferguson', 'sarah_oneill', 'max_kirkby']
df = load_kirby_data(names)
df.shape

(108, 3)

In [35]:
df.head()

Unnamed: 0,user,question,answer
0,0,0,0
1,0,1,1
2,0,2,0
3,0,3,1
4,0,4,1


In [37]:
dls = CollabDataLoaders.from_df(df, item_name='question')
dls.show_batch()

Unnamed: 0,user,question,answer
0,3,4,1
1,2,18,1
2,0,6,1
3,3,11,0
4,1,20,0
5,2,4,1
6,1,22,1
7,0,2,0
8,1,18,1
9,1,24,1


In [38]:
dls.classes

{'user': ['#na#', 0, 1, 2, 3],
 'question': ['#na#', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]}

In [39]:
n_users = len(dls.classes['user'])
n_questions = len(dls.classes['question'])
n_factors = 5

user_factors = torch.randn(n_users, n_factors)
question_factors = torch.randn(n_questions, n_factors)

In [52]:
class DotProductBias(Module):
    
    def __init__(self, n_users, n_questions, n_factors, y_range=(0, 1)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        self.question_factors = Embedding(n_questions, n_factors)
        self.question_bias = Embedding(n_questions, 1)
        self.y_range = y_range
        
    def forward(self, x):
        users = self.user_factors(x[:,0])
        questions = self.question_factors(x[:,1])
        res = (users * questions).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.question_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [53]:
model = DotProductBias(n_users, n_questions, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(10, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.250786,0.249815,00:00
1,0.25102,0.249568,00:00
2,0.250681,0.248983,00:00
3,0.250315,0.248196,00:00
4,0.249862,0.247283,00:00
5,0.249191,0.246256,00:00
6,0.24854,0.245312,00:00
7,0.247896,0.244586,00:00
8,0.247287,0.244173,00:00
9,0.246735,0.244051,00:00


In [46]:
embs = get_emb_sz(dls)
embs

[(5, 4), (28, 10)]

In [54]:
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0,1), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range
        
    def forward(self, x):
        embs = self.user_factors(x[:,0]),self.item_factors(x[:,1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)

In [55]:
model = CollabNN(*embs)

In [56]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)

epoch,train_loss,valid_loss,time
0,0.257349,0.258948,00:00
1,0.259869,0.256062,00:00
2,0.257102,0.253447,00:00
3,0.255644,0.251486,00:00
4,0.254513,0.250678,00:00
