In [None]:
%load_ext autoreload
%autoreload
import os
from tqdm import tqdm
import numpy as np
import torch
import pandas as pd
from sklearn.preprocessing import normalize
from RS.utils.dataset import Dataset as Mydataset
from RS.utils.dataset import combine_multi_domain
from RS.utils.dataset import user_item_clustering
from RS.utils.mf import ALS_MF
from RS.utils.dictutils import *
dataroot = os.path.join("data")
d = torch.device('cuda:4')

In [None]:
datafolder = {
    "training_user_course":os.path.join(
        dataroot,"course","train.csv"
    ),
    "training_user_book":os.path.join(
        dataroot,"book","user_cate3_train.csv"
    ),
    "testing_user_course":os.path.join(
        dataroot, "course", "test.csv"
    ),
    "testing_user_book":os.path.join(
        dataroot, "book", "user_cate3_test.csv"
    )
}


dataset = Mydataset(datafolder=datafolder)

## Clustering

###  book

In [None]:
user_book_df = dataset.getdata(
    "training_user_book", normalize_value=True
)
user_book_df.head()

#### user - book

In [None]:
user_item_clustering(
    User_Item_df=user_book_df, num_clusters=100,
    savingpath = os.path.join("result","CBMF","clustering","bookdataset") ,
    d=d
)

#### books clustering by first 2 layer 

In [None]:
cate = user_book_df.drop(columns=['uid']).columns.tolist()
step = 10
layer2_cate = {
    k:cate[v:v+step] 
    for k, v in enumerate(range(0, len(cate), step))
}
writejson(
    layer2_cate, 
    jsfilepath=os.path.join(
        "result","CBMF","clustering","bookdataset","itemclustering","layer2.json"
    )
)


### user - course 

In [None]:
user_course_df = combine_multi_domain(
    Dataset=dataset, 
    domains=[
        [
            ("training_user_course",False,False),
            ("testing_user_course",False,False)
        ]
    ]
)
user_course_df.head()

In [None]:
user_item_clustering(
    User_Item_df=user_course_df,
    num_clusters=100,
    savingpath=os.path.join(
        "result","CBMF","clustering","coursedataset"
    ),
    d=d
)

## Cross domain cluster level matrix

In [None]:
book_cluster = loadjson(
    os.path.join("data","book","association","rulesL3.json")
)

course_cluster = loadjson(
    os.path.join(
        "result","CBMF",
        "clustering","coursedataset",
        "itemclustering", "cluster.json"
    )
)


user_course_cluster = loadjson(
    os.path.join(
        "result","CBMF",
        "clustering","coursedataset",
        "userclustering", "cluster.json"
    )
)

user_book_cluster = loadjson(
    os.path.join(
        "result","CBMF",
        "clustering","bookdataset",
        "userclustering", "cluster.json"
    )
)

user_book_cluster[
    len(user_book_cluster.keys())
] = dataset.getdata("testing_user_book").uid.tolist()

In [None]:
%autoreload
from RS.utils.dataset import cluster_level_matrix

### Book 

In [None]:
courseUser_book = cluster_level_matrix(
    R=dataset.getdata("training_user_book"),
    user_cluster=user_course_cluster,
    item_cluster=book_cluster
)
print(courseUser_book.shape)

In [None]:
bookUser_book = cluster_level_matrix(
    R=dataset.getdata("training_user_book"),
    user_cluster=user_book_cluster,
    item_cluster=book_cluster
)

In [None]:
user_book_cluster_level = np.concatenate(
    [courseUser_book, bookUser_book ],
    axis=0
)
user_book_cluster_level = normalize(
    user_book_cluster_level, norm="l1",axis=1
)
print(user_book_cluster_level.shape)

In [None]:
np.save(
    os.path.join('result',"CBMF","clustering","AssbookC"),
    user_book_cluster_level
)

### Course

In [None]:
courseUser_course = cluster_level_matrix(
    R=user_course_df,
    user_cluster=user_course_cluster,
    item_cluster=course_cluster
)

In [None]:
bookUser_course = cluster_level_matrix(
    R=user_course_df,
    user_cluster=user_book_cluster,
    item_cluster=course_cluster
)

In [None]:
user_course_cluster_level = np.concatenate(
    [courseUser_course, bookUser_course ],
    axis=0
)
print(user_course_cluster_level.shape)

In [None]:
np.save(
    os.path.join('result',"CBMF","clustering","courseC"),
    user_course_cluster_level
)

### Combine

In [None]:
user_course_cluster_level = np.load(
    os.path.join('result',"CBMF","clustering","courseC.npy")
)

In [None]:
cb = np.concatenate(
    [user_book_cluster_level,user_course_cluster_level],
    axis=1
)
print(cb.shape)

In [None]:
np.save(os.path.join('result',"CBMF","clustering","Ass_cb"),cb)