In [1]:
import os
import random
import argparse
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle 
import operator

In [2]:
def extract_file(root_dir,file_name):
    output=[]
    with open(os.path.join(root_dir,file_name),'r') as f:
        for line in f:
            output.append((line.split(",")[1].strip("\n"),line.split(",")[2].strip("\n")))
    output=sorted(output, key=operator.itemgetter(1),reverse=True)
    recall=float(output[0][0])
    mrr=float(output[0][1])
    return recall,mrr

## NARM Model

In [3]:
root_dir=os.path.join(os.getcwd(), "NARM","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

long_df=pd.DataFrame()
long_df["Model"]=["NARM"]*3
long_df["Dataset"]=["yoochoose1_64","diginetica","amex-poi-category"]
long_df["Recall@20"]=[r1,r2,r3]
long_df["MRR@20"]=[m1,m2,m3]
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%


In [4]:
root_dir=os.path.join(os.getcwd(), "NARM","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

short_df=pd.DataFrame()
short_df["Model"]=["NARM"]*3
short_df["Dataset"]=["yoochoose1_64","diginetica","amex-poi-category"]
short_df["Recall@20"]=[r1,r2,r3]
short_df["MRR@20"]=[m1,m2,m3]
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%


## SRGNN

In [5]:
root_dir=os.path.join(os.getcwd(), "SRGNN","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["SRGNN"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
long_df=long_df.append(tempt,ignore_index=True)
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%
3,SRGNN,yoochoose1_64,68.27%,27.52%
4,SRGNN,diginetica,47.55%,14.59%
5,SRGNN,amex-poi-category,54.75%,27.22%


In [6]:
root_dir=os.path.join(os.getcwd(), "SRGNN","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["SRGNN"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
short_df=short_df.append(tempt,ignore_index=True)
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%
3,SRGNN,yoochoose1_64,69.22%,34.32%
4,SRGNN,diginetica,47.68%,18.42%
5,SRGNN,amex-poi-category,60.42%,47.99%


## NISER

In [7]:
root_dir=os.path.join(os.getcwd(), "NISER","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["NISER"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
long_df=long_df.append(tempt,ignore_index=True)
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%
3,SRGNN,yoochoose1_64,68.27%,27.52%
4,SRGNN,diginetica,47.55%,14.59%
5,SRGNN,amex-poi-category,54.75%,27.22%
6,NISER,yoochoose1_64,69.75%,28.55%
7,NISER,diginetica,51.44%,16.14%
8,NISER,amex-poi-category,60.54%,40.60%


In [8]:
root_dir=os.path.join(os.getcwd(), "NISER","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["NISER"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
short_df=short_df.append(tempt,ignore_index=True)
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%
3,SRGNN,yoochoose1_64,69.22%,34.32%
4,SRGNN,diginetica,47.68%,18.42%
5,SRGNN,amex-poi-category,60.42%,47.99%
6,NISER,yoochoose1_64,71.29%,35.37%
7,NISER,diginetica,51.22%,19.98%
8,NISER,amex-poi-category,57.06%,47.98%


## TAGNN

In [13]:
root_dir=os.path.join(os.getcwd(), "TAGNN","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["TAGNN"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1/100,r2/100,r3/100],"MRR@20":[m1/100,m2/100,m3/100]}
tempt=pd.DataFrame(tempt)
long_df=long_df.append(tempt,ignore_index=True)
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%
3,SRGNN,yoochoose1_64,68.27%,27.52%
4,SRGNN,diginetica,47.55%,14.59%
5,SRGNN,amex-poi-category,54.75%,27.22%
6,NISER,yoochoose1_64,69.75%,28.55%
7,NISER,diginetica,51.44%,16.14%
8,NISER,amex-poi-category,60.54%,40.60%
9,TAGNN,yoochoose1_64,68.40%,26.99%


In [11]:
root_dir=os.path.join(os.getcwd(), "TAGNN","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["TAGNN"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1/100,r2/100,r3/100],"MRR@20":[m1/100,m2/100,m3/100]}
tempt=pd.DataFrame(tempt)
short_df=short_df.append(tempt,ignore_index=True)
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%
3,SRGNN,yoochoose1_64,69.22%,34.32%
4,SRGNN,diginetica,47.68%,18.42%
5,SRGNN,amex-poi-category,60.42%,47.99%
6,NISER,yoochoose1_64,71.29%,35.37%
7,NISER,diginetica,51.22%,19.98%
8,NISER,amex-poi-category,57.06%,47.98%
9,TAGNN,yoochoose1_64,69.90%,34.70%


## LESSR

In [14]:
root_dir=os.path.join(os.getcwd(), "LESSR","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["LESSR"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
long_df=long_df.append(tempt,ignore_index=True)
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%
3,SRGNN,yoochoose1_64,68.27%,27.52%
4,SRGNN,diginetica,47.55%,14.59%
5,SRGNN,amex-poi-category,54.75%,27.22%
6,NISER,yoochoose1_64,69.75%,28.55%
7,NISER,diginetica,51.44%,16.14%
8,NISER,amex-poi-category,60.54%,40.60%
9,TAGNN,yoochoose1_64,68.40%,26.99%


In [15]:
root_dir=os.path.join(os.getcwd(), "LESSR","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["LESSR"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
short_df=short_df.append(tempt,ignore_index=True)
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%
3,SRGNN,yoochoose1_64,69.22%,34.32%
4,SRGNN,diginetica,47.68%,18.42%
5,SRGNN,amex-poi-category,60.42%,47.99%
6,NISER,yoochoose1_64,71.29%,35.37%
7,NISER,diginetica,51.22%,19.98%
8,NISER,amex-poi-category,57.06%,47.98%
9,TAGNN,yoochoose1_64,69.90%,34.70%


## MSGIFSR

In [16]:
root_dir=os.path.join(os.getcwd(), "MSGIFSR","long_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["MSGIFSR"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
long_df=long_df.append(tempt,ignore_index=True)
long_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,67.72%,26.05%
1,NARM,diginetica,41.31%,12.01%
2,NARM,amex-poi-category,62.79%,49.05%
3,SRGNN,yoochoose1_64,68.27%,27.52%
4,SRGNN,diginetica,47.55%,14.59%
5,SRGNN,amex-poi-category,54.75%,27.22%
6,NISER,yoochoose1_64,69.75%,28.55%
7,NISER,diginetica,51.44%,16.14%
8,NISER,amex-poi-category,60.54%,40.60%
9,TAGNN,yoochoose1_64,68.40%,26.99%


In [17]:
root_dir=os.path.join(os.getcwd(), "MSGIFSR","short_seq","output_metrics")
file_1="test_yoochoose1_64_metrics.txt"
file_2="test_diginetica_metrics.txt"
file_3="test_amex_metrics.txt"

r1,m1=extract_file(root_dir,file_1)
r2,m2=extract_file(root_dir,file_2)
r3,m3=extract_file(root_dir,file_3)

tempt={"Model":["MSGIFSR"]*3, "Dataset":["yoochoose1_64","diginetica","amex-poi-category"],
      "Recall@20":[r1,r2,r3],"MRR@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
short_df=short_df.append(tempt,ignore_index=True)
short_df.style.format({'Recall@20':'{:.2%}','MRR@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,Recall@20,MRR@20
0,NARM,yoochoose1_64,69.08%,33.37%
1,NARM,diginetica,43.80%,15.76%
2,NARM,amex-poi-category,74.31%,67.17%
3,SRGNN,yoochoose1_64,69.22%,34.32%
4,SRGNN,diginetica,47.68%,18.42%
5,SRGNN,amex-poi-category,60.42%,47.99%
6,NISER,yoochoose1_64,71.29%,35.37%
7,NISER,diginetica,51.22%,19.98%
8,NISER,amex-poi-category,57.06%,47.98%
9,TAGNN,yoochoose1_64,69.90%,34.70%


In [18]:
long_df.to_csv("long_sequence_result.csv")
short_df.to_csv("short_sequence_result.csv")

### Hyperparameter Tuning

In [77]:
def extract_file(root_dir,file_name):
    output=[]
    with open(os.path.join(root_dir,file_name),'r') as f:
        for line in f:
            output.append((line.split(",")[1].strip("\n"),line.split(",")[2].strip("\n")))
    output=sorted(output, key=operator.itemgetter(1),reverse=True)
    recall=float(output[0][0])
    mrr=float(output[0][1])
    return recall,mrr

## number of layers

#### SRGNN

In [81]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_128_yh.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_128_yh.txt")

df={"Model":["SRGNN"]*3,"Dataset":["yoochoose1_64"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
df=pd.DataFrame(df)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [82]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_128_dg.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_128_dg.txt")

tempt={"Model":["SRGNN"]*3,"Dataset":["diginetica"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [83]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_256_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_256_amex.txt")

tempt={"Model":["SRGNN"]*3,"Dataset":["amex-poi-category"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

#### LESSR

In [85]:
root_dir="./LESSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_128_yh.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_128_yh.txt")

tempt={"Model":["LESSR"]*3,"Dataset":["yoochoose1_64"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [86]:
root_dir="./LESSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_128_dg.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_128_dg.txt")

tempt={"Model":["LESSR"]*3,"Dataset":["diginetica"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [88]:
root_dir="./LESSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_2_emb_256_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_256_amex.txt")

tempt={"Model":["LESSR"]*3,"Dataset":["amex-poi-category"]*3,"num_layers":[1,2,3],"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,num_layers,recall@20,mrr@20
0,SRGNN,yoochoose1_64,1,70.73%,31.31%
1,SRGNN,yoochoose1_64,2,70.77%,31.28%
2,SRGNN,yoochoose1_64,3,70.75%,31.28%
3,SRGNN,diginetica,1,51.55%,17.16%
4,SRGNN,diginetica,2,51.58%,17.23%
5,SRGNN,diginetica,3,51.65%,17.25%
6,SRGNN,amex-poi-category,1,73.86%,41.79%
7,SRGNN,amex-poi-category,2,80.42%,41.56%
8,SRGNN,amex-poi-category,3,78.86%,42.80%
9,LESSR,yoochoose1_64,1,70.30%,31.13%


In [89]:
df.to_csv("output.csv")

## embedding dimension

#### NARM

In [111]:
root_dir="./NARM/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="emb_64_yh.txt")
r2,m2=extract_file(root_dir,file_name="emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="emb_256_yh.txt")
r4,m4=extract_file(root_dir,file_name="emb_512_yh.txt")

df={"Model":["NARM"]*4,"Dataset":["yoochoose1_64"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}
df=pd.DataFrame(df)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [112]:
root_dir="./NARM/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="emb_64_dg.txt")
r2,m2=extract_file(root_dir,file_name="emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="emb_256_dg.txt")
r4,m4=extract_file(root_dir,file_name="emb_512_dg.txt")

tempt={"Model":["NARM"]*4,"Dataset":["diginetica"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [113]:
root_dir="./NARM/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="emb_1024_amex.txt")

tempt={"Model":["NARM"]*6,"Dataset":["amex-poi-category"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

#### SRGNN

In [114]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_yh.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_yh.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_yh.txt")

tempt={"Model":["SRGNN"]*4,"Dataset":["yoochoose1_64"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [115]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_dg.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_dg.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_dg.txt")

tempt={"Model":["SRGNN"]*4,"Dataset":["diginetica"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

In [116]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_1_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["SRGNN"]*6,"Dataset":["amex-poi-category"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

#### NISER

In [117]:
root_dir="./NISER/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_yh.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_yh.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_yh.txt")

tempt={"Model":["NISER"]*4,"Dataset":["yoochoose1_64"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [118]:
root_dir="./NISER/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_dg.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_dg.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_dg.txt")

tempt={"Model":["NISER"]*4,"Dataset":["diginetica"]*4,"embed-dim":[64,128,256,512],"recall@20":[r1,r2,r3,r4],"mrr@20":[m1,m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [119]:
root_dir="./NISER/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_1_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["NISER"]*6,"Dataset":["amex-poi-category"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

#### LESSR

In [120]:
root_dir="./LESSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_yh.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_yh.txt")

tempt={"Model":["LESSR"]*3,"Dataset":["yoochoose1_64"]*3,"embed-dim":[128,256,512],"recall@20":[r2,r3,r4],"mrr@20":[m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [121]:
root_dir="./LESSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_dg.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_dg.txt")

tempt={"Model":["LESSR"]*3,"Dataset":["diginetica"]*3,"embed-dim":[128,256,512],"recall@20":[r2,r3,r4],"mrr@20":[m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [122]:
root_dir="./LESSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_1_emb_768_amex.txt")
# r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["LESSR"]*4,"Dataset":["amex-poi-category"]*4,"embed-dim":[128,256,512,768],"recall@20":[r2,r3,r4,r5],"mrr@20":[m2,m3,m4,m5]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

#### MSGIFSR

In [123]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="order_1_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="order_1_emb_256_yh.txt")
r4,m4=extract_file(root_dir,file_name="order_1_emb_512_yh.txt")

tempt={"Model":["MSGIFSR"]*3,"Dataset":["yoochoose1_64"]*3,"embed-dim":[128,256,512],"recall@20":[r2,r3,r4],"mrr@20":[m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [124]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="order_1_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="order_1_emb_256_dg.txt")
r4,m4=extract_file(root_dir,file_name="order_1_emb_512_dg.txt")

tempt={"Model":["MSGIFSR"]*3,"Dataset":["diginetica"]*3,"embed-dim":[128,256,512],"recall@20":[r2,r3,r4],"mrr@20":[m2,m3,m4]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [125]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"

r2,m2=extract_file(root_dir,file_name="order_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="order_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="order_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="order_1_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="order_1_emb_1024_amex.txt")

tempt={"Model":["MSGIFSR"]*5,"Dataset":["amex-poi-category"]*5,"embed-dim":[128,256,512,768,1024],"recall@20":[r2,r3,r4,r5,r6],"mrr@20":[m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

Unnamed: 0,Model,Dataset,embed-dim,recall@20,mrr@20
0,NARM,yoochoose1_64,64,68.80%,28.89%
1,NARM,yoochoose1_64,128,69.75%,29.70%
2,NARM,yoochoose1_64,256,69.50%,29.25%
3,NARM,yoochoose1_64,512,68.59%,28.40%
4,NARM,diginetica,64,47.38%,14.89%
5,NARM,diginetica,128,47.43%,15.33%
6,NARM,diginetica,256,46.71%,14.97%
7,NARM,diginetica,512,45.22%,14.32%
8,NARM,amex-poi-category,64,66.43%,61.00%
9,NARM,amex-poi-category,128,66.71%,58.00%


In [126]:
df.to_csv("output.csv")

# model structure

In [248]:
import os
import random
import argparse
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, SequentialSampler

#### NARM

In [249]:
import sys
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
model_name="NARM"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from NARM import metric
from NARM.utils import collate_fn
from NARM.narm import NARM
from NARM.dataset import load_data, RecSysDataset

parser = argparse.ArgumentParser()
parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--batch_size', type=int, default=100, help='input batch size')
parser.add_argument('--hidden_size', type=int, default=128, help='hidden state size of gru module')
parser.add_argument('--embed_dim', type=int, default=128, help='the dimension of item embedding')
parser.add_argument('--topk', type=int, default=20, help='number of top score items selected for calculating recall and mrr metrics')
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth") 
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--patience', type=int, default=5, help='the number of epoch to wait before early stop ')
args,_ = parser.parse_known_args()

model_name="NARM"

args.n_items=37484
args.batch_size=100 
args.epoch=30 
args.embed_dim=128 
args.hidden_size=128
args.model_checkpoint='yoochoose1_64_checkpoint.pth'

model = NARM(args.n_items, args.hidden_size, args.embed_dim, args.batch_size)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="emb.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

df={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
    "embed-dim":args.embed_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":None,"multi-granularity":1}
df=pd.DataFrame(df)

#### amex
args.n_items=556
args.batch_size=32 
args.epoch=30 
args.embed_dim=256 
args.hidden_size=256
args.model_checkpoint='amex_checkpoint.pth'

model = NARM(args.n_items, args.hidden_size, args.embed_dim, args.batch_size)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])
num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="emb.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

amex_df={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
    "embed-dim":args.embed_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":None,"multi-granularity":1}
amex_df=pd.DataFrame(amex_df)

#### SRGNN

In [250]:
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
old_model_name="NARM"
old_model_path=os.path.join(root_dir,old_model_name)
sys.path=[x for x in sys.path if x !=old_model_path]

model_name="SRGNN"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from SRGNN.srgnn import SRGNN
from SRGNN.collate import (collate_fn_factory, seq_to_session_graph)
from SRGNN import metric
from SRGNN.dataset import load_data,RecSysDataset

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--batch-size', type=int, default=100, help='the batch size for training')
parser.add_argument('--lr', type=float, default=1e-3, help='the learning rate')
parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--embedding-dim', type=int, default=128, help='the embedding size')
parser.add_argument('--num-layers', type=int, default=1, help='the number of layers')
parser.add_argument('--feat-drop', type=float, default=0.1, help='the dropout ratio for features')
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth")
args,_= parser.parse_known_args()

model_name="SRGNN"
args.n_items=37484
args.batch_size=100 
args.epoch=30 
args.embedding_dim=128
args.num_layers=1
args.model_checkpoint='yoochoose1_64_checkpoint.pth'

model = SRGNN(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'})

##### amex
args.n_items=556
args.batch_size=32 
args.epoch=30 
args.embedding_dim=256
args.num_layers=1
args.model_checkpoint='amex_checkpoint.pth'
model = SRGNN(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])
num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
amex_df=amex_df.append(tempt,ignore_index=True)

#### NISER

In [251]:
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
old_model_name="SRGNN"
old_model_path=os.path.join(root_dir,old_model_name)
sys.path=[x for x in sys.path if x !=old_model_path]

model_name="NISER"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from NISER.niser import NISER
from NISER.collate import (collate_fn_factory, seq_to_session_graph)
from NISER import metric
from NISER.dataset import load_data,RecSysDataset

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--embedding-dim', type=int, default=128, help='the embedding size')
parser.add_argument('--num-layers', type=int, default=1, help='the number of layers')
parser.add_argument('--feat-drop', type=float, default=0.1, help='the dropout ratio for features')
parser.add_argument('--batch-size', type=int, default=100, help='the batch size for training')
parser.add_argument('--epochs', type=int, default=30, help='the number of training epochs')
parser.add_argument("--output_name", type=str, default="amex_metrics.txt")
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth")
parser.add_argument('--topk', type=int, default=20, help='number of top score items selected for calculating recall and mrr metrics')
parser.add_argument('--num-workers',type=int,default=0,help='the number of processes to load the input graphs')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
args,_= parser.parse_known_args()

model_name="NISER"
args.n_items=37484
args.batch_size=100 
args.epoch=30 
args.embedding_dim=128
args.num_layers=1
args.model_checkpoint='yoochoose1_64_checkpoint.pth'
model = NISER(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'})

##### amex
args.n_items=556
args.batch_size=32 
args.epoch=30 
args.embedding_dim=256
args.num_layers=1
args.model_checkpoint='amex_checkpoint.pth'

model = NISER(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
amex_df=amex_df.append(tempt,ignore_index=True)

#### TAGNN

In [252]:
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
old_model_name="NISER"
old_model_path=os.path.join(root_dir,old_model_name)
sys.path=[x for x in sys.path if x !=old_model_path]

model_name="TAGNN"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from TAGNN.utils import build_graph, Data, split_validation
from TAGNN.model import *

parser = argparse.ArgumentParser()
parser.add_argument('--batchSize', type=int, default=100, help='input batch size')
parser.add_argument('--hiddenSize', type=int, default=100, help='hidden state size')
parser.add_argument('--epoch', type=int, default=30, help='the number of epochs to train for')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')  # [0.001, 0.0005, 0.0001]
parser.add_argument('--lr_dc', type=float, default=0.1, help='learning rate decay rate')
parser.add_argument('--lr_dc_step', type=int, default=3, help='the number of steps after which the learning rate decay')
parser.add_argument('--l2', type=float, default=1e-5, help='l2 penalty')  # [0.001, 0.0005, 0.0001, 0.00005, 0.00001]
parser.add_argument("--gradient_accumulation",action='store_true', help='gradient accumulation or not')
parser.add_argument("--accumulation_steps",type=int,default=2,
                           help="Number of updates steps to accumulate before performing a backward/update pass.")
parser.add_argument('--step', type=int, default=1, help='gnn propogation steps')
parser.add_argument('--patience', type=int, default=10, help='the number of epoch to wait before early stop ')
parser.add_argument('--nonhybrid', action='store_true', help='only use the global preference to predict')
parser.add_argument('--validation', action='store_true', help='validation')
parser.add_argument('--valid_portion', type=float, default=0.1, help='split the portion of training set as validation set')
parser.add_argument("--output_name", type=str, default="amex_metrics.txt")
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth")
parser.add_argument('--topk', type=int, default=20, help='number of top score items selected for calculating recall and mrr metrics')
args,_= parser.parse_known_args()

model_name="TAGNN"

args.batchSize=100
args.epoch=30
args.hiddenSize=128
args.step=1
args.model_checkpoint="yoochoose1_64_checkpoint.pth"
model = SessionGraph(args, n_node=37484)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.hiddenSize,"batch-size":args.batchSize,"learning-rate":args.lr,"num-layers":args.step,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'})

##### amex
args.batchSize=32
args.epoch=30
args.hiddenSize=256
args.step=1
args.model_checkpoint="amex_checkpoint.pth"

n_node=556
model = SessionGraph(args, n_node)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])
num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.hiddenSize,"batch-size":args.batchSize,"learning-rate":args.lr,"num-layers":args.step,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
amex_df=amex_df.append(tempt,ignore_index=True)

#### LESSR

In [253]:
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
old_model_name="TAGNN"
old_model_path=os.path.join(root_dir,old_model_name)
sys.path=[x for x in sys.path if x !=old_model_path]

model_name="LESSR"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from LESSR.lessr import LESSR
from LESSR.collate import (collate_fn_factory, seq_to_eop_multigraph,seq_to_shortcut_graph)
from LESSR import metric
from LESSR.dataset import load_data,RecSysDataset

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')  # [0.001, 0.0005, 0.0001]
parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--embedding-dim', type=int, default=128, help='the embedding size')
parser.add_argument('--num-layers', type=int, default=1, help='the number of layers')
parser.add_argument('--feat-drop', type=float, default=0.1, help='the dropout ratio for features')
parser.add_argument('--step', type=int, default=1, help='gnn propogation steps')
parser.add_argument('--batch-size', type=int, default=100, help='the batch size for training')
parser.add_argument('--epochs', type=int, default=30, help='the number of training epochs')
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth") 

args,_= parser.parse_known_args()

model_name="LESSR"

args.n_items=37484
args.batch_size=100 
args.epoch=30 
args.embedding_dim=128
args.num_layers=3
args.model_checkpoint='yoochoose1_64_checkpoint.pth'

model = LESSR(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
# model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
# ckpt = torch.load(model_path)
# model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
# df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'})

### amex
args.n_items=556
args.batch_size=32 
args.epoch=30 
args.embedding_dim=256
args.num_layers=3
args.model_checkpoint='amex_checkpoint.pth'

model = LESSR(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model_path=os.path.join(os.getcwd(),model_name, args.model_checkpoint)
ckpt = torch.load(model_path)
model.load_state_dict(ckpt['state_dict'])

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embedding.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":1}
tempt=pd.DataFrame(tempt)
amex_df=amex_df.append(tempt,ignore_index=True)

#### MSGIFSR

In [254]:
sys.path=list(set(sys.path))
root_dir='/home/ec2-user/SageMaker/sequence-based-recommendation'
old_model_name=""
old_model_path=os.path.join(root_dir,old_model_name)
sys.path=[x for x in sys.path if x !=old_model_path]

model_name="MSGIFSR"
model_path=os.path.join(root_dir,model_name)
sys.path.append(model_path)

from MSGIFSR.msgifsr import MSGIFSR
from MSGIFSR.collate import (collate_fn_factory_ccs, seq_to_ccs_graph)
from MSGIFSR import metric
from MSGIFSR.dataset import load_data,RecSysDataset

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')  # [0.001, 0.0005, 0.0001]
parser.add_argument('--embedding-dim', type=int, default=128, help='the embedding size')
parser.add_argument('--num-layers', type=int, default=1, help='the number of layers')
parser.add_argument('--feat-drop', type=float, default=0.1, help='the dropout ratio for features')
parser.add_argument('--step', type=int, default=1, help='gnn propogation steps')
parser.add_argument('--batch-size', type=int, default=100, help='the batch size for training')
parser.add_argument('--epochs', type=int, default=30, help='the number of training epochs')
parser.add_argument("--model_checkpoint", type=str, default="amex_checkpoint.pth") 
parser.add_argument('--order',type=int,default=3,help='order of msg')
parser.add_argument('--reducer',type=str,default='mean',help='method for reducer')
parser.add_argument('--norm',type=bool,default=True,help='whether use l2 norm')
parser.add_argument('--extra',action='store_true',help='whether use REnorm.')
parser.add_argument('--fusion',action='store_true',help='whether use IFR.')
args,_= parser.parse_known_args()

model_name="MSGIFSR"

args.n_items=37484
args.batch_size=100 
args.epoch=30 
args.embedding_dim=128
args.num_layers=1
args.order=3
args.model_checkpoint='yoochoose1_64_checkpoint.pth'

device = torch.device('cpu')
model = MSGIFSR(args.n_items, args.embedding_dim, args.num_layers, dropout=args.feat_drop, reducer=args.reducer, order=args.order, 
                norm=args.norm, extra=args.extra, fusion=args.fusion, device=device)

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embeddings.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":3}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'} )\
.set_caption("Model Parameters in yoochoose1_64")\
.set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '15px')]}])

Unnamed: 0,Model,num_parameter,embedding-lookup,layer-param,embed-dim,batch-size,learning-rate,num-layers,multi-granularity
0,NARM,4962688,4797952,164736,128,100,0.001,,1
1,SRGNN,5044736,4797952,246784,128,100,0.001,1.0,1
2,NISER,5044736,4797952,246784,128,100,0.001,1.0,1
3,TAGNN,5078400,4797952,280448,128,100,0.001,1.0,1
4,LESSR,6334316,4797952,1536364,128,100,0.001,3.0,1
5,MSGIFSR,6449156,4797952,1651204,128,100,0.001,1.0,3


In [255]:
args.n_items=556
args.batch_size=32
args.epoch=30 
args.embedding_dim=256
args.num_layers=1
args.order=3
args.model_checkpoint='amex_checkpoint.pth'
device = torch.device('cpu')
model = MSGIFSR(args.n_items, args.embedding_dim, args.num_layers, dropout=args.feat_drop, reducer=args.reducer, order=args.order, 
                norm=args.norm, extra=args.extra, fusion=args.fusion, device=device)

num_param=np.sum([p.nelement() for p in model.parameters()])
for name,p in model.named_parameters():
    if name=="embeddings.weight":
        embedding_lookup=np.sum(p.nelement())
layer_param=num_param-embedding_lookup

tempt={"Model":[model_name],"num_parameter":num_param,"embedding-lookup":embedding_lookup,"layer-param":num_param-embedding_lookup,
       "embed-dim":args.embedding_dim,"batch-size":args.batch_size,"learning-rate":args.lr,"num-layers":args.num_layers,"multi-granularity":3}
tempt=pd.DataFrame(tempt)
amex_df=amex_df.append(tempt,ignore_index=True)
amex_df.style.format({'num_parameter':'{:,}','embedding-lookup':'{:,}','layer-param':'{:,}','learning-rate':'{:.3f}'} )\
.set_caption("Model Parameters in amex_explorepoi-poi_category")\
.set_table_styles([{'selector': 'caption','props': [('color', 'red'),('font-size', '15px')]}])

Unnamed: 0,Model,num_parameter,embedding-lookup,layer-param,embed-dim,batch-size,learning-rate,num-layers,multi-granularity
0,NARM,799488,142336,657152,256,32,0.001,,1
1,SRGNN,1127424,142336,985088,256,32,0.001,1.0,1
2,NISER,1127424,142336,985088,256,32,0.001,1.0,1
3,TAGNN,1260288,142336,1117952,256,32,0.001,1.0,1
4,LESSR,6122540,142336,5980204,256,32,0.001,3.0,1
5,MSGIFSR,6688772,142336,6546436,256,32,0.001,1.0,3


## multi-granularity level (MSGIFSR)

In [259]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="order_1_emb_128_yh.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_128_yh.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_128_yh.txt")
df={"Dataset":["yoochoose1_64"]*3,"multi-granularity":[1,2,3],"embed-dim":[128]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
df=pd.DataFrame(df)

r1,m1=extract_file(root_dir,file_name="order_1_emb_128_dg.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_128_dg.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_128_dg.txt")
tempt={"Dataset":["diginetica"]*3,"multi-granularity":[1,2,3],"embed-dim":[128]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

r1,m1=extract_file(root_dir,file_name="order_1_emb_128_amex.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_128_amex.txt")
tempt={"Dataset":["amex-poi-category"]*3,"multi-granularity":[1,2,3],"embed-dim":[128]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [260]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="order_1_emb_256_yh.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_256_yh.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_256_yh.txt")
tempt={"Dataset":["yoochoose1_64"]*3,"multi-granularity":[1,2,3],"embed-dim":[256]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

r1,m1=extract_file(root_dir,file_name="order_1_emb_256_dg.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_256_dg.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_256_dg.txt")
tempt={"Dataset":["diginetica"]*3,"multi-granularity":[1,2,3],"embed-dim":[256]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

r1,m1=extract_file(root_dir,file_name="order_1_emb_256_amex.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_256_amex.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_256_amex.txt")
tempt={"Dataset":["amex-poi-category"]*3,"multi-granularity":[1,2,3],"embed-dim":[256]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [261]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="order_1_emb_512_yh.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_512_yh.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_512_yh.txt")
tempt={"Dataset":["yoochoose1_64"]*3,"multi-granularity":[1,2,3],"embed-dim":[512]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

r1,m1=extract_file(root_dir,file_name="order_1_emb_512_dg.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_512_dg.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_512_dg.txt")
tempt={"Dataset":["diginetica"]*3,"multi-granularity":[1,2,3],"embed-dim":[512]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

r1,m1=extract_file(root_dir,file_name="order_1_emb_512_amex.txt")
r2,m2=extract_file(root_dir,file_name="order_2_emb_512_amex.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_512_amex.txt")
tempt={"Dataset":["amex-poi-category"]*3,"multi-granularity":[1,2,3],"embed-dim":[512]*3,"recall@20":[r1,r2,r3],"mrr@20":[m1,m2,m3]}
tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [262]:
df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

Unnamed: 0,Dataset,multi-granularity,embed-dim,recall@20,mrr@20
0,yoochoose1_64,1,128,72.26%,32.05%
1,yoochoose1_64,2,128,72.31%,32.17%
2,yoochoose1_64,3,128,72.23%,32.12%
3,diginetica,1,128,55.71%,19.17%
4,diginetica,2,128,55.78%,19.16%
5,diginetica,3,128,55.73%,19.22%
6,amex-poi-category,1,128,82.57%,62.20%
7,amex-poi-category,2,128,77.65%,61.18%
8,amex-poi-category,3,128,85.39%,63.11%
9,yoochoose1_64,1,256,72.21%,32.05%


In [263]:
df.to_csv("output.csv")

## embedding dimension (amex-poi-category)

In [241]:
root_dir="./NARM/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="emb_1024_amex.txt")

df={"Model":["NARM"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}
df=pd.DataFrame(df)

In [242]:
root_dir="./SRGNN/hypyer-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_1_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["SRGNN"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [243]:
root_dir="./NISER/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_1_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_1_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_1_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_1_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_1_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["NISER"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [244]:
root_dir="./LESSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="layer_3_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="layer_3_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="layer_3_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="layer_3_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="layer_3_emb_768_amex.txt")
# r6,m6=extract_file(root_dir,file_name="layer_1_emb_1024_amex.txt")

tempt={"Model":["LESSR"]*5,"embed-dim":[64,128,256,512,768],"recall@20":[r1,r2,r3,r4,r5],"mrr@20":[m1,m2,m3,m4,m5]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)

In [245]:
root_dir="./MSGIFSR/hyper-tuning/output_metrics"
r1,m1=extract_file(root_dir,file_name="order_3_emb_64_amex.txt")
r2,m2=extract_file(root_dir,file_name="order_3_emb_128_amex.txt")
r3,m3=extract_file(root_dir,file_name="order_3_emb_256_amex.txt")
r4,m4=extract_file(root_dir,file_name="order_3_emb_512_amex.txt")
r5,m5=extract_file(root_dir,file_name="order_3_emb_768_amex.txt")
r6,m6=extract_file(root_dir,file_name="order_3_emb_1024_amex.txt")

tempt={"Model":["MSGIFSR"]*6,"embed-dim":[64,128,256,512,768,1024],"recall@20":[r1,r2,r3,r4,r5,r6],"mrr@20":[m1,m2,m3,m4,m5,m6]}

tempt=pd.DataFrame(tempt)
df=df.append(tempt,ignore_index=True)
df.style.format({'recall@20':'{:.2%}','mrr@20':'{:.2%}'})

Unnamed: 0,Model,embed-dim,recall@20,mrr@20
0,NARM,64,66.43%,61.00%
1,NARM,128,66.71%,58.00%
2,NARM,256,60.81%,49.98%
3,NARM,512,64.56%,40.20%
4,NARM,768,65.00%,51.97%
5,NARM,1024,60.50%,49.61%
6,SRGNN,64,62.37%,42.99%
7,SRGNN,128,65.18%,38.43%
8,SRGNN,256,73.86%,41.79%
9,SRGNN,512,84.17%,51.41%


In [246]:
df.to_csv("output.csv")