In [1]:
import torch as th 
import torch.nn as nn
from tqdm import tqdm
import copy

import warnings
from utils.utils import convert_to_gpu
from dgl.data.utils import load_graphs
from utils.LinkScorePredictor import LinkScorePredictor
from model.R_HGNN import R_HGNN

import pandas as pd
import numpy as np

from IPython.core.display import HTML
HTML("""
.output_png {
    display: table-cell;
    text-align: right;
    vertical-align: middle;
}
""")

%load_ext autoreload
%autoreload 2

# Data Manipulation functions

In [2]:
from data_utils import get_fileSize, get_col_names, setType, isValid, load_txt_df

def get_ids_from_txtFile(path_txt_file, type ,ids):
    chunksize=1000000
    chunks=[]
    size = get_fileSize(path_txt_file)
    col_names=get_col_names(type)
    df_chunks = pd.read_csv(path_txt_file, names=col_names, sep="\t", encoding='utf8', header = 0, chunksize=chunksize)
    
    for chunk in tqdm(df_chunks, total=size//chunksize):
        for col in chunk.columns:
            try:
                chunk[col]=chunk[col].apply(lambda x: setType(x, col))
            except:
                chunk[col]=chunk[col].apply(lambda x: isValid(x, col))
                chunk[col]=chunk[col].apply(lambda x: setType(x, col))
        chunk = chunk[chunk[col_names[0]].isin(ids)] 
        chunks.append(chunk)
    df = pd.concat(chunks)
    # print(f'{type} df has shpe:',df.shape)
    return df

def get_id_mapping(path_to_file, type, reverse=False):
    if reverse:
        return {i: id for i, id in enumerate(load_txt_df(path_to_file, type)[get_col_names(type)[0]])}
    else:
        return {id: i for i, id in enumerate(load_txt_df(path_to_file, type)[get_col_names(type)[0]])}
    

def topK_playcounts(path_to_txt_data, series, type, k, user_id):
    val_count_pairs=series.value_counts().head(k)
    ids=list(val_count_pairs.keys())
    type_df = get_ids_from_txtFile(path_to_txt_data, type, ids)
    print(f'Top {k} {type}s for user {user_id}')
    print('ID       PLAYCOUNT      NAME')
    for id, playcount in val_count_pairs.items():
        row=type_df.loc[type_df[type+'_id'] == id]
        name = row[type+'_name'].item()
        print(f'{id}        {playcount}         {name} ')


def topK_recommendations(path_to_txt_data, ids, type, k, user_id, artist_path_to_txt_data=None):
    type_df = get_ids_from_txtFile(path_to_txt_data, type, ids)
    if artist_path_to_txt_data != None:
        artist_ids=[type_df.loc[type_df[type+'_id'] == id]['artist_id'].item() for id in ids]
        artist_df= get_ids_from_txtFile(artist_path_to_txt_data, type='artist', ids=artist_ids)
        artist_name_mapping={id: artist_df.loc[artist_df['artist_id'] == id]['artist_name'].item() for id in artist_ids}

    print(f'Top {k} {type} recommendations for user #{user_id}')
    if artist_path_to_txt_data != None:
        print('ID           ARTIST_NAME             NAME')
    else:
        print('ID             NAME')
    for id in ids:
        row=type_df.loc[type_df[type+'_id'] == id]
        name = row[type+'_name'].item()
        try:
            artist_id = row['artist_id'].item()
            print(f'{id}        {artist_name_mapping[artist_id]}        {name}')
        except:
            print(f'{id}  {name}')


2022-06-09 17:00:44.278355: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-09 17:00:44.278385: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


# Setting Arugments and initializing graph

In [3]:
## ARGS
N_LAYERS = 2
HIDDEN_DIM = 32
RELATIONAL_INPUT_DIM = 20
RELATIONAL_HIDDEN_DIM = 8
N_HEADS = 8
DROPOUT = 0.3
RESIDUAL = True
NORM = True
DEVICE='cuda'
PATH_TO_USERS_FILE='LFM-1b_users.txt'
PATH_TO_TRACKS_FILE='LFM-1b_tracks.txt'
PATH_TO_LES_FILE='LFM-1b_LEs.txt'
PATH_TO_ARTISTS_FILE='LFM-1b_artists.txt'
PATH_TO_ALBUMS_FILE='LFM-1b_albums.txt'

# using DGl's load_graphs function to load pre-computed and processed files
glist,_=load_graphs('lastfm1b_subset.bin') # <- this file represents a subset of the full dataset
hg=glist[0] # hg=='heterogeneous graph' ;) from the list of graphs in the processed file (hint: theres only one) pick our heterogenous subset graph


r_hgnn = R_HGNN(graph=hg,
                input_dim_dict={ntype: hg.nodes[ntype].data['feat'].shape[1] for ntype in hg.ntypes},
                hidden_dim=HIDDEN_DIM, 
                relation_input_dim=RELATIONAL_INPUT_DIM,
                relation_hidden_dim=RELATIONAL_HIDDEN_DIM,
                num_layers=N_LAYERS, 
                n_heads=N_HEADS, 
                dropout=DROPOUT,
                residual=RESIDUAL, 
                norm=NORM)


warnings.filterwarnings('ignore')

# Recommendation with Link Prediction Models

In [4]:
def displayTopKRecommendations(graph, model, type, sampled_edge_type, k, user_id, typeFile_path, userFile_path, artists_path, albums_path, tracks_path, les_path, device, artist_path_to_txt_data=None):
    input_features = {(stype, etype, dtype): graph.srcnodes[dtype].data['feat'] for stype, etype, dtype in graph.canonical_etypes}
    nodes_representation, _ = model[0].inference(graph, copy.deepcopy(input_features), device=device)

    user_nodes_representation=nodes_representation['user']
    type_nodes_representation=nodes_representation[type]
    # C = torch.mm(A, B.T)  # same as C = A @ B.T
    listen_to_type_likelihood = C = th.mm(user_nodes_representation, type_nodes_representation.T)

    print(f'listen_to_{type}_likelihood',listen_to_type_likelihood.shape)
    user_type_recommendations={}

    for u_id, row in enumerate(tqdm(listen_to_type_likelihood, total=int(listen_to_type_likelihood.shape[0]))):
        for id, _ in enumerate(row):
            try:
                graph.edge_id(u_id,id, etype=sampled_edge_type)
            except:
                if u_id in user_type_recommendations.keys():
                    user_type_recommendations[u_id].append((u_id, id, listen_to_type_likelihood[u_id,id].item()))
                else:
                    user_type_recommendations[u_id]=list()
                    user_type_recommendations[u_id].append((u_id, id, listen_to_type_likelihood[u_id,id].item()))


    rev_user_mapping = get_id_mapping(path_to_file=userFile_path, type='user', reverse=True)

    rev_type_mapping = get_id_mapping(path_to_file=typeFile_path, type=type, reverse=True)

    user_listens=get_ids_from_txtFile(path_txt_file=les_path, type='le' , ids=[rev_user_mapping[user_id]])
    
    topK_playcounts(path_to_txt_data=artists_path, series=user_listens['artist_id'], type='artist', k=k, user_id=user_id)
    topK_playcounts(path_to_txt_data=albums_path, series=user_listens['album_id'], type='album', k=k, user_id=user_id)
    topK_playcounts(path_to_txt_data=tracks_path, series=user_listens['track_id'], type='track', k=k, user_id=user_id)
    user_type_recommendations={key: sorted(value, key=lambda x: x[2], reverse=True)[:k] for key, value in  user_type_recommendations.items()}
    user_type_recommendations=[rev_type_mapping[id] for _, id, _ in user_type_recommendations[user_id]]
    if artist_path_to_txt_data !=None:
        topK_recommendations(path_to_txt_data=typeFile_path, ids=user_type_recommendations, type=type, k=k, user_id=user_id, artist_path_to_txt_data=artists_path)
    else:
        topK_recommendations(path_to_txt_data=typeFile_path, ids=user_type_recommendations, type=type, k=k, user_id=user_id)

In [5]:
USER_ID=5
K=10

In [6]:
# Albums Recommednations
SAMPLED_EDGE_TYPE='listened_to_album'
MODEL_NAME='R_HGNN'+'_'+SAMPLED_EDGE_TYPE
LINK_SCORE_PREDICTOR = LinkScorePredictor(HIDDEN_DIM * N_HEADS)
album_predictor = nn.Sequential(r_hgnn, LINK_SCORE_PREDICTOR)
album_predictor = convert_to_gpu(album_predictor, device=DEVICE)
# load model parameter
SAVE_MODEL_FOLDER = f"../save_model/'lfm1b'/{MODEL_NAME}"
album_predictor.load_state_dict(th.load(SAVE_MODEL_FOLDER+'/'+MODEL_NAME+'.pkl', map_location='cpu'))
# evaluate the best model
album_predictor.eval()

displayTopKRecommendations(
    graph=hg,
    model=album_predictor,
    type='album',
    sampled_edge_type=SAMPLED_EDGE_TYPE,
    k=K,
    user_id=USER_ID,
    typeFile_path=PATH_TO_ALBUMS_FILE,
    userFile_path=PATH_TO_USERS_FILE,
    artists_path=PATH_TO_ARTISTS_FILE,
    albums_path=PATH_TO_ALBUMS_FILE,
    tracks_path=PATH_TO_TRACKS_FILE,
    les_path=PATH_TO_LES_FILE,
    device=DEVICE,
    artist_path_to_txt_data=PATH_TO_ARTISTS_FILE
    )

inference for the 33-th batch in model 0-th layer: 100%|████████████████████████████████| 34/34 [00:10<00:00,  3.37it/s]
inference for the 33-th batch in model 1-th layer: 100%|████████████████████████████████| 34/34 [00:02<00:00, 12.13it/s]


listen_to_album_likelihood torch.Size([17, 9702])


100%|██████████| 17/17 [01:02<00:00,  3.70s/it]


loading df in LFM-1b_users.txt


1it [00:00, 10.34it/s]


loading df in LFM-1b_albums.txt


1it [00:00, 30.58it/s]
1it [00:00,  4.21it/s]
1it [00:00, 94.45it/s]


Top 10 artists for user 5
ID       PLAYCOUNT      NAME
283        111         Iron Maiden 
14        81         3 Inches of Blood 
334        79         Manowar 
1643        63         Jin 
1763        56         Portishead 
1428        52         Beborn Beton 
153        52         Muse 
1674        47         Flight of the Conchords 
1354        47         Alestorm 
1339        46         Nightwish 


1it [00:00, 46.67it/s]


Top 10 albums for user 5
ID       PLAYCOUNT      NAME
3584        63         Collection of Trance & Ambient 
3849        56         Dummy 
3641        47         Flight of the Conchords 
3130        45         Captain Morgan's Revenge 
870        42         Painkiller 
3585        40         The Four Seaons; Concertos for Oboe & Strings 
3036        39         Second Floor 
1078        38         Black Holes and Revelations 
2999        32         A Matter of Life and Death 
3236        28         History: The Best of New Model Army 


1it [00:00, 19.08it/s]


Top 10 tracks for user 5
ID       PLAYCOUNT      NAME
8623        7         These Colours Don't Run 
8484        7         Beowulf, Part 2 
9070        7         Lost Little Robot 
6802        7         Knights of Cydonia 
8523        6         Call to Arms 
9061        6         Terror on the High Seas 
10434        6         Autumn - Allegro 
11111        6         Mysterons 
9303        6         Wenches & Meat 
11116        5         Numb 


1it [00:00, 47.79it/s]
1it [00:00, 89.90it/s]

Top 10 album recommendations for user #5
ID           ARTIST_NAME             NAME
348        Ensiferum        Dragonheads (EP)
444        Ensiferum        Tale Of Revenge (Single)
124        Kobong        Kobong
353        Ensiferum        Ensiferum
412        Made of Hate        Pathogen
6091        Miles Davis        Milestones
1206        Kobong        Chmury nie było
536        Made of Hate        Bullet In Your Head
956        Ensiferum        Act Of Grace
883        Ensiferum        Victory Songs





In [7]:
# Tracks Recommednations
SAMPLED_EDGE_TYPE='listened_to_track'
MODEL_NAME='R_HGNN'+'_'+SAMPLED_EDGE_TYPE
LINK_SCORE_PREDICTOR = LinkScorePredictor(HIDDEN_DIM * N_HEADS)
track_predictor = nn.Sequential(r_hgnn, LINK_SCORE_PREDICTOR)
track_predictor = convert_to_gpu(track_predictor, device=DEVICE)
# load model parameter
SAVE_MODEL_FOLDER = f"../save_model/'lfm1b'/{MODEL_NAME}"
track_predictor.load_state_dict(th.load(SAVE_MODEL_FOLDER+'/'+MODEL_NAME+'.pkl', map_location='cpu'))
# evaluate the best model
track_predictor.eval()

displayTopKRecommendations(
    graph=hg,
    model=track_predictor,
    type='track',
    sampled_edge_type=SAMPLED_EDGE_TYPE,
    k=K,
    user_id=USER_ID,
    typeFile_path=PATH_TO_TRACKS_FILE,
    userFile_path=PATH_TO_USERS_FILE,
    artists_path=PATH_TO_ARTISTS_FILE,
    albums_path=PATH_TO_ALBUMS_FILE,
    tracks_path=PATH_TO_TRACKS_FILE,
    les_path=PATH_TO_LES_FILE,
    device=DEVICE,
    artist_path_to_txt_data=PATH_TO_ARTISTS_FILE
    )

inference for the 33-th batch in model 0-th layer: 100%|████████████████████████████████| 34/34 [00:09<00:00,  3.44it/s]
inference for the 33-th batch in model 1-th layer: 100%|████████████████████████████████| 34/34 [00:02<00:00, 12.20it/s]


listen_to_track_likelihood torch.Size([17, 28185])


100%|██████████| 17/17 [03:06<00:00, 10.96s/it]


loading df in LFM-1b_users.txt


1it [00:00, 82.57it/s]


loading df in LFM-1b_tracks.txt


1it [00:00, 10.98it/s]
1it [00:00,  4.22it/s]
1it [00:00, 81.88it/s]


Top 10 artists for user 5
ID       PLAYCOUNT      NAME
283        111         Iron Maiden 
14        81         3 Inches of Blood 
334        79         Manowar 
1643        63         Jin 
1763        56         Portishead 
1428        52         Beborn Beton 
153        52         Muse 
1674        47         Flight of the Conchords 
1354        47         Alestorm 
1339        46         Nightwish 


1it [00:00, 46.57it/s]


Top 10 albums for user 5
ID       PLAYCOUNT      NAME
3584        63         Collection of Trance & Ambient 
3849        56         Dummy 
3641        47         Flight of the Conchords 
3130        45         Captain Morgan's Revenge 
870        42         Painkiller 
3585        40         The Four Seaons; Concertos for Oboe & Strings 
3036        39         Second Floor 
1078        38         Black Holes and Revelations 
2999        32         A Matter of Life and Death 
3236        28         History: The Best of New Model Army 


1it [00:00, 17.69it/s]


Top 10 tracks for user 5
ID       PLAYCOUNT      NAME
8623        7         These Colours Don't Run 
8484        7         Beowulf, Part 2 
9070        7         Lost Little Robot 
6802        7         Knights of Cydonia 
8523        6         Call to Arms 
9061        6         Terror on the High Seas 
10434        6         Autumn - Allegro 
11111        6         Mysterons 
9303        6         Wenches & Meat 
11116        5         Numb 


1it [00:00, 19.28it/s]
1it [00:00, 87.09it/s]

Top 10 track recommendations for user #5
ID           ARTIST_NAME             NAME
27976        Faithless        Crazy English Summer
27956        Faithless        Everything Will Be Alright Tomorrow
27955        Faithless        Mass Destruction
27314        Faithless        Salva Mea
27264        Muse        Save Me
26780        Robert Schumann        Cello Concerto in A minor, Op. 129: I. Nicht zu schnell
27263        Muse        The 2nd Law: Unsustainable
27267        Muse        Animals
27266        Muse        Explorers
27265        Muse        Big Freeze





In [8]:
# Artist Recommednations
SAMPLED_EDGE_TYPE='listened_to_artist'
MODEL_NAME='R_HGNN'+'_'+SAMPLED_EDGE_TYPE
LINK_SCORE_PREDICTOR = LinkScorePredictor(HIDDEN_DIM * N_HEADS)
artist_predictor = nn.Sequential(r_hgnn, LINK_SCORE_PREDICTOR)
artist_predictor = convert_to_gpu(artist_predictor, device=DEVICE)
# load model parameter
SAVE_MODEL_FOLDER = f"../save_model/'lfm1b'/{MODEL_NAME}"
artist_predictor.load_state_dict(th.load(SAVE_MODEL_FOLDER+'/'+MODEL_NAME+'.pkl', map_location='cpu'))
# evaluate the best model
artist_predictor.eval()


displayTopKRecommendations(
    graph=hg,
    model=artist_predictor,
    type='artist',
    sampled_edge_type=SAMPLED_EDGE_TYPE,
    k=K,
    user_id=USER_ID,
    typeFile_path=PATH_TO_ARTISTS_FILE,
    userFile_path=PATH_TO_USERS_FILE,
    artists_path=PATH_TO_ARTISTS_FILE,
    albums_path=PATH_TO_ALBUMS_FILE,
    tracks_path=PATH_TO_TRACKS_FILE,
    les_path=PATH_TO_LES_FILE,
    device=DEVICE
    )

inference for the 33-th batch in model 0-th layer: 100%|████████████████████████████████| 34/34 [00:10<00:00,  3.40it/s]
inference for the 33-th batch in model 1-th layer: 100%|████████████████████████████████| 34/34 [00:02<00:00, 12.22it/s]


listen_to_artist_likelihood torch.Size([17, 4775])


100%|██████████| 17/17 [00:32<00:00,  1.90s/it]


loading df in LFM-1b_users.txt


1it [00:00, 58.23it/s]


loading df in LFM-1b_artists.txt


1it [00:00, 53.63it/s]
1it [00:00,  4.18it/s]
1it [00:00, 84.52it/s]


Top 10 artists for user 5
ID       PLAYCOUNT      NAME
283        111         Iron Maiden 
14        81         3 Inches of Blood 
334        79         Manowar 
1643        63         Jin 
1763        56         Portishead 
1428        52         Beborn Beton 
153        52         Muse 
1674        47         Flight of the Conchords 
1354        47         Alestorm 
1339        46         Nightwish 


1it [00:00, 52.77it/s]


Top 10 albums for user 5
ID       PLAYCOUNT      NAME
3584        63         Collection of Trance & Ambient 
3849        56         Dummy 
3641        47         Flight of the Conchords 
3130        45         Captain Morgan's Revenge 
870        42         Painkiller 
3585        40         The Four Seaons; Concertos for Oboe & Strings 
3036        39         Second Floor 
1078        38         Black Holes and Revelations 
2999        32         A Matter of Life and Death 
3236        28         History: The Best of New Model Army 


1it [00:00, 18.49it/s]


Top 10 tracks for user 5
ID       PLAYCOUNT      NAME
8623        7         These Colours Don't Run 
8484        7         Beowulf, Part 2 
9070        7         Lost Little Robot 
6802        7         Knights of Cydonia 
8523        6         Call to Arms 
9061        6         Terror on the High Seas 
10434        6         Autumn - Allegro 
11111        6         Mysterons 
9303        6         Wenches & Meat 
11116        5         Numb 


1it [00:00, 96.04it/s]

Top 10 artist recommendations for user #5
ID             NAME
168  Arsis
40  Wintersun
53  Kevin Ayers
381  Isme (Włocławek)
156  MC5
26  The Bill
9  Coma
352  Mangala Vallis
272  Jimi Hendrix
516  JerryC



