In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader

stalls = pd.read_csv('hawkerstallscleaned.csv').reset_index(drop=True)

interactions = (
    pd.read_csv('userinteractionscleaned.csv')
      .rename(columns={'stall_id': 'place_id'})
)

interactions = interactions[
    interactions['place_id'].isin(stalls['place_id'])
].reset_index(drop=True)

interactions = interactions[
    interactions['rating'] >= 4
].reset_index(drop=True)

REF = pd.Timestamp('2025-04-27')
def parse_rt(rt):
    if pd.isna(rt):
        return REF
    num, unit, *_ = rt.split()
    n = 1 if num in ('a','an') else int(num)
    if 'year' in unit: return REF - pd.DateOffset(years=n)
    if 'month' in unit: return REF - pd.DateOffset(months=n)
    if 'day' in unit: return REF - pd.DateOffset(days=n)
    return REF

interactions['ts'] = interactions['relative_time'].apply(parse_rt)

counts = interactions['author'].value_counts()
eligible = counts[counts >= 10].index.tolist()
interactions = interactions[
    interactions['author'].isin(eligible)
].reset_index(drop=True)

ienc = LabelEncoder().fit(interactions['place_id'])
n_items = len(ienc.classes_)

train_parts, test_sets = [], {}
for user, grp in interactions.groupby('author'):
    grp_sorted = grp.sort_values('ts').reset_index(drop=True)
    half = len(grp_sorted) // 2
    train_parts.append(grp_sorted.iloc[:half])
    test_sets[user] = set(grp_sorted.iloc[half:]['place_id'])
train_df = pd.concat(train_parts, ignore_index=True)

uenc = LabelEncoder().fit(train_df['author'])
train_df['uid'] = uenc.transform(train_df['author'])
train_df['iid'] = ienc.transform(train_df['place_id'])
n_users = len(uenc.classes_)

u_idx = train_df['uid'].values
i_idx = train_df['iid'].values + n_users
edges = np.vstack([np.concatenate([u_idx, i_idx]), np.concatenate([i_idx, u_idx])])
edge_index = torch.LongTensor(edges)

class NGCF(nn.Module):
    def __init__(self, n_users, n_items, dim=64, layers=[64,64]):
        super().__init__()
        self.total = n_users + n_items
        self.embedding = nn.Embedding(self.total, dim)
        self.layers = nn.ModuleList()
        in_dim = dim
        for out_dim in layers:
            self.layers.append(nn.Linear(in_dim, out_dim))
            in_dim = out_dim
        self.act = nn.LeakyReLU()

    def propagate(self, edge_index):
        src, dst = edge_index
        emb = self.embedding.weight
        deg = torch.bincount(dst, minlength=self.total).float().unsqueeze(1)
        norm = deg.pow(-0.5); norm[torch.isinf(norm)] = 0.0
        msg = emb[src] * norm[src]
        agg = torch.zeros_like(emb).index_add_(0, dst, msg)
        return agg * norm

    def forward(self, edge_index):
        all_emb = self.embedding.weight
        embs = [all_emb]
        for layer in self.layers:
            agg = self.propagate(edge_index)
            side = layer(agg)
            emb = self.act(side + layer(all_emb))
            all_emb = emb
            embs.append(emb)
        return sum(embs) / len(embs)

def bpr_loss(u, pos, neg):
    pos_s = (u * pos).sum(dim=1)
    neg_s = (u * neg).sum(dim=1)
    return -torch.log(torch.sigmoid(pos_s - neg_s) + 1e-8).mean()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NGCF(n_users, n_items).to(device)
opt = optim.Adam(model.parameters(), lr=1e-3)

pairs = list(zip(train_df['uid'], train_df['iid']))
model.train()
for epoch in range(1, 41):
    np.random.shuffle(pairs)
    epoch_loss = 0
    for u, i_pos in pairs:
        i_neg = np.random.randint(n_items)
        while (u, i_neg) in pairs:
            i_neg = np.random.randint(n_items)
        opt.zero_grad()
        emb = model(edge_index.to(device))
        u_emb = emb[u].unsqueeze(0)
        pos_emb = emb[n_users + i_pos].unsqueeze(0)
        neg_emb = emb[n_users + i_neg].unsqueeze(0)
        loss = bpr_loss(u_emb, pos_emb, neg_emb)
        loss.backward(); opt.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch:02d} Loss: {epoch_loss/len(pairs):.4f}")

model.eval()
with torch.no_grad():
    final_embs = model(edge_index.to(device)).cpu().numpy()
user_embs = final_embs[:n_users]
item_embs = final_embs[n_users:]

pool_sizes = [50, 100, 200, 500, 1000, 2000, n_items]
ks = [1, 2, 3, 5]

metrics_by_pool = {
    ps: {k: {'hit': [], 'precision': [], 'recall': [], 'f1': []} for k in ks}
    for ps in pool_sizes
}

for ps in pool_sizes:
    for user in eligible:
        uid = uenc.transform([user])[0]
        test_i = [
            ienc.transform([pid])[0]
            for pid in test_sets[user]
            if pid in ienc.classes_
        ]
        if not test_i:
            continue

        seen = set(train_df[train_df['uid']==uid]['iid'])
        neg_pool = list(set(range(n_items)) - seen - set(test_i))

        neg_needed = ps - len(test_i)
        if neg_needed > 0:
            neg_needed = min(neg_needed, len(neg_pool))
            sampled_neg = np.random.choice(neg_pool, neg_needed, replace=False).tolist()
            candidates = test_i + sampled_neg
        else:
            candidates = test_i.copy()

        scores = item_embs[candidates].dot(user_embs[uid])
        ranked = np.array(candidates)[np.argsort(scores)[::-1]]

        for k in ks:
            rec_k = ranked[:k]
            tp = sum(i in test_i for i in rec_k)
            prec = tp / k
            rec = tp / len(test_i)
            f1 = 2*prec*rec/(prec+rec) if (prec+rec)>0 else 0.0
            hit = tp > 0

            m = metrics_by_pool[ps][k]
            m['hit'].append(hit)
            m['precision'].append(prec)
            m['recall'].append(rec)
            m['f1'].append(f1)

hitrate_rows, metrics_rows = [], []
for ps, data in metrics_by_pool.items():
    hr = {'pool_size': ps}
    mr = {'pool_size': ps}
    for k in ks:
        hr[f'HitRate@{k}'] = np.mean(data[k]['hit'])
        mr[f'Precision@{k}'] = np.mean(data[k]['precision'])
        mr[f'Recall@{k}'] = np.mean(data[k]['recall'])
        mr[f'F1@{k}'] = np.mean(data[k]['f1'])
    hitrate_rows.append(hr)
    metrics_rows.append(mr)

ngcf_hitrate_df = pd.DataFrame(hitrate_rows)
ngcf_metrics_df = pd.DataFrame(metrics_rows)

Epoch 01 Loss: 0.6781
Epoch 02 Loss: 0.3838
Epoch 03 Loss: 0.2921
Epoch 04 Loss: 0.2273
Epoch 05 Loss: 0.1826
Epoch 06 Loss: 0.1538
Epoch 07 Loss: 0.1243
Epoch 08 Loss: 0.1070
Epoch 09 Loss: 0.0846
Epoch 10 Loss: 0.0800
Epoch 11 Loss: 0.0696
Epoch 12 Loss: 0.0596
Epoch 13 Loss: 0.0498
Epoch 14 Loss: 0.0512
Epoch 15 Loss: 0.0446
Epoch 16 Loss: 0.0443
Epoch 17 Loss: 0.0425
Epoch 18 Loss: 0.0335
Epoch 19 Loss: 0.0406
Epoch 20 Loss: 0.0350
Epoch 21 Loss: 0.0304
Epoch 22 Loss: 0.0292
Epoch 23 Loss: 0.0330
Epoch 24 Loss: 0.0273
Epoch 25 Loss: 0.0362
Epoch 26 Loss: 0.0360
Epoch 27 Loss: 0.0365
Epoch 28 Loss: 0.0250
Epoch 29 Loss: 0.0322
Epoch 30 Loss: 0.0255
Epoch 31 Loss: 0.0279
Epoch 32 Loss: 0.0224
Epoch 33 Loss: 0.0204
Epoch 34 Loss: 0.0290
Epoch 35 Loss: 0.0187
Epoch 36 Loss: 0.0255
Epoch 37 Loss: 0.0253
Epoch 38 Loss: 0.0240
Epoch 39 Loss: 0.0286
Epoch 40 Loss: 0.0208


In [3]:
print("=== NGCF HitRate@k ===")
ngcf_hitrate_df

=== NGCF HitRate@k ===


Unnamed: 0,pool_size,HitRate@1,HitRate@2,HitRate@3,HitRate@5
0,50,0.454036,0.647982,0.751121,0.860987
1,100,0.331839,0.494395,0.591928,0.725336
2,200,0.189462,0.311659,0.404709,0.545964
3,500,0.102018,0.171525,0.233184,0.331839
4,1000,0.056054,0.105381,0.147982,0.209641
5,2000,0.030269,0.059417,0.085202,0.124439
6,2778,0.0213,0.045964,0.065022,0.097534


In [4]:
print("\n=== NGCF Precision@k, Recall@k, F1@k ===")
ngcf_metrics_df


=== NGCF Precision@k, Recall@k, F1@k ===


Unnamed: 0,pool_size,Precision@1,Recall@1,F1@1,Precision@2,Recall@2,F1@2,Precision@3,Recall@3,F1@3,Precision@5,Recall@5,F1@5
0,50,0.454036,0.050345,0.088722,0.427691,0.092647,0.146893,0.410688,0.131061,0.189561,0.387668,0.20419,0.251498
1,100,0.331839,0.035988,0.063484,0.306054,0.064274,0.102172,0.282511,0.08894,0.128474,0.261659,0.134201,0.165785
2,200,0.189462,0.020623,0.036314,0.176009,0.036642,0.058196,0.171151,0.052743,0.076414,0.163677,0.08215,0.101909
3,500,0.102018,0.011321,0.019913,0.091928,0.019682,0.031071,0.088939,0.028367,0.040746,0.083632,0.042377,0.052268
4,1000,0.056054,0.006181,0.010896,0.053812,0.011802,0.018519,0.052691,0.016565,0.023758,0.048879,0.024483,0.030182
5,2000,0.030269,0.003218,0.005681,0.030269,0.006823,0.010682,0.030269,0.009958,0.014159,0.027803,0.01482,0.017939
6,2778,0.0213,0.002382,0.004193,0.023543,0.005264,0.008273,0.022422,0.007379,0.010519,0.0213,0.011651,0.014046
