# Theoretical task

In case of such matrix of interactions it is impossible to give any personal recommendations since each item has only one interaction so we cannot have similar items or users -- all users like different items.

In [1]:
import numpy as np
import pandas as pd

from polara import get_movielens_data

from dataprep import leave_last_out, transform_indices, reindex_data, verify_time_split, generate_interactions_matrix
from evaluation import topn_recommendations, model_evaluate, downvote_seen_items

# Task

Implement two variants of user-based KNN for the top-$n$ recommendations task when similarity matrix is calculated:
1. with neighborhood subsampling,
2. with additional weighting.

Recall, there's no reason for implementing row-wise weighting scheme in user-based KNN. So choose the weighting scheme wisely.

 In your experiments:  
- Test your solution against both weak and strong generalization. 
  - In total you'll have 4 different experiments.
- Follow the "most-recent-item" sampling strategy for constructing holdout.
  - Explain potential issues of this scheme in relation to both weak and strong generalization.  
- Report evaluation metrics, compare the models, and analyse the results.  
- Use Movielens-1M data.

**Note**: you can reuse the code from seminars if necessary.

In [2]:
data = get_movielens_data(include_time=True)

# Weak generalization test

## Preparing data (1 pts)

Your task is
- split data into training and holdout parts
- build a new internal contiguous representation of user and item index based on the training data
- make sure same index is used in the holdout data

In [3]:
# split most recent holdout item from each user
training_, holdout_ = leave_last_out(data, 'userid', 'timestamp')

# check correct time splitting
verify_time_split(training_, holdout_)

In [4]:
# reindex data to make contiguous index starting from 0 for user and item IDs
training, data_index = transform_indices(training_, 'userid', 'movieid')

# apply new index to the holdout data
holdout = reindex_data(holdout_, data_index, filter_invalid=True)
holdout = holdout.sort_values('userid')

- Let's also populate data description dictionary for convenience.
- It allows using uniform names for users and items field.
  - This way the code does't depend on the actual names in you dataset.
  - So later you can easily switch to another dataset without changing the code fo the pipeline.


In [5]:
data_description = dict(
    users = data_index['users'].name,
    items = data_index['items'].name,
    feedback = 'rating',
    n_users = len(data_index['users']),
    n_items = len(data_index['items']),
    test_users = holdout[data_index['users'].name].values,
)

As previously, let's also explicitly store our testset (i.e., ratings of test users excluding holdout items).

In [6]:
userid = data_description['users']
seen_idx_mask = training[userid].isin(data_description['test_users'])
testset = training[seen_idx_mask]

## Models implementation

### Unweighted case (5 pts)

- You can consult the code from seminars or implement your own solution as long as it is fast enough.  
- **Make sure to implement some kind of neighborhood subsampling.**
  - Recall that subsampling of the neighborhood not only makes the algorithm run faster, but can also improve the results.

In [7]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

def cosine_similarity_zd(matrix):
    '''Build cosine similarity matrix with zero diagonal.'''
    similarity = cosine_similarity(matrix, dense_output=False) # type your code here
    similarity.setdiag(0)
    similarity.eliminate_zeros()
    return similarity

def truncate_similarity(similarity, k):
    '''
    For every row in similarity matrix, pick at most k entities
    with the highest similarity scores. Disregard everything else.
    '''
    similarity = similarity.tocsr()
    inds = similarity.indices
    ptrs = similarity.indptr
    data = similarity.data
    new_ptrs = [0]
    new_inds = []
    new_data = []
    for i in range(len(ptrs)-1):
        start, stop = ptrs[i], ptrs[i+1]
        if start < stop:
            data_ = data[start:stop]
            topk = min(len(data_), k)
            idx = np.argpartition(data_, -topk)[-topk:]
            new_data.append(data_[idx])
            new_inds.append(inds[idx+start])
            new_ptrs.append(new_ptrs[-1]+len(idx))
        else:
            new_ptrs.append(new_ptrs[-1])
    new_data = np.concatenate(new_data)
    new_inds = np.concatenate(new_inds)
    truncated = csr_matrix(
        (new_data, new_inds, new_ptrs),
        shape=similarity.shape
    )
    return truncated

In [8]:
def build_uknn_model(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(
        data, data_description, 
        rebase_users=False
    )

    # compute similarity matrix
    user_similarity = truncate_similarity(
        cosine_similarity_zd(user_item_mtx),  
        config['n_neighbors']
    )
    return user_item_mtx, user_similarity


def uknn_model_scoring(params, testset, testset_description):
    # implement the scoring function to assign scores
    # to all items for test users
    user_item_mtx, user_similarity = params
    # write your code for scoring, don't forget to return a dense array
    scores = user_similarity.dot(user_item_mtx)[testset_description['test_users']].A
    return scores

In [9]:
%%time

uknn_params = build_uknn_model({'n_neighbors': 20}, training, data_description)

CPU times: total: 1.98 s
Wall time: 2.49 s


In [10]:
%%time

uknn_scores = uknn_model_scoring(uknn_params, testset, data_description)

CPU times: total: 156 ms
Wall time: 313 ms


Recall: recommending items from user history doesn't make sense.

### Weighted case (5 pts)

- Your task here is to implement user-based KNN with asymmetric similarity.

In [11]:
from scipy.sparse import diags

def build_uknn_model_weighted(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)
    user_similarity = truncate_similarity(
        cosine_similarity_zd(user_item_mtx),
        config['n_neighbors']
    )

    weights = user_similarity.sum(axis=0).A.squeeze()
    weights = np.divide(1., weights, where=weights!=0)
    user_similarity_asym = user_similarity.dot(diags(weights))
    return user_item_mtx, user_similarity_asym

def uknn_model_scoring_weighted(params, testset, testset_description):
    # implement the scoring function to assign scores
    # to all items for test users
    user_item_mtx, user_similarity = params
    # write your code for scoring, don't forget to return a dense array
    scores = user_similarity.dot(user_item_mtx)[testset_description['test_users']].A
    return scores


In [12]:
%%time

uknn_params_weighted = build_uknn_model_weighted({'n_neighbors': 20}, training, data_description)

CPU times: total: 2.14 s
Wall time: 2.48 s


In [13]:
%%time

uknn_scores_weighted = uknn_model_scoring_weighted(uknn_params_weighted, testset, data_description)

CPU times: total: 250 ms
Wall time: 350 ms


 ## Evaluation (1 pts)

#### Generate top-$n$ recommendations for both models

In [14]:
%%time

downvote_seen_items(uknn_scores, testset, data_description)
uknn_recs = topn_recommendations(uknn_scores)

CPU times: total: 141 ms
Wall time: 271 ms


In [15]:
%%time

downvote_seen_items(uknn_scores_weighted, testset, data_description)
uknn_recs_weighted = topn_recommendations(uknn_scores_weighted)

CPU times: total: 125 ms
Wall time: 215 ms


### Calculate metrics

In [16]:
modes = ['unweighted', 'weighted']
uknn_recs_dict = dict(zip(modes, [uknn_recs, uknn_recs_weighted]))


uknn_metrics = {}
for mode, recs in uknn_recs_dict.items():
    if recs is None: continue
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout, data_description)
    print(
        f'Similarity type: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Similarity type: unweighted
HR=0.0845, MRR=0.0308, COV=0.302

Similarity type: weighted
HR=0.077, MRR=0.0266, COV=0.445



In [9]:
import psutil
import time
import plotly.graph_objects as go
import plotly.subplots
import gc
from typing import Callable, Dict

def visualize_report(
    neighbors, execution_times, ram_utilizations,
    cpu_utilizations, HRs, MRRs, COVs
):
    fig = plotly.subplots.make_subplots(
        rows=1, cols=3, shared_xaxes=True, vertical_spacing=0.1,
        subplot_titles=("Execution Time vs. Neighbours", "RAM Utilization vs. Neighbours")
    )

    fig.add_trace(go.Scatter(x=neighbors, y=execution_times, mode='lines+markers', name='Execution Time'), row=1, col=1)
    fig.add_trace(go.Scatter(x=neighbors, y=ram_utilizations, mode='lines+markers', name='RAM Utilization'), row=1, col=2)
    fig.add_trace(go.Scatter(x=neighbors, y=cpu_utilizations, mode='lines+markers', name='CPU Utilization'), row=1, col=3)

    fig.update_layout(title_text="Resource Monitoring")
    fig.show()

    fig = plotly.subplots.make_subplots(
        rows=1, cols=3, shared_xaxes=True, vertical_spacing=0.1,
        subplot_titles=("HR vs. Neighbours", "MRR vs. Neighbours", "COV vs. Neighbours")
    )
    fig.add_trace(go.Scatter(x=neighbors, y=HRs, mode='lines+markers', name='HR'), row=1, col=1)
    fig.add_trace(go.Scatter(x=neighbors, y=MRRs, mode='lines+markers', name='MRR'), row=1, col=2)
    fig.add_trace(go.Scatter(x=neighbors, y=COVs, mode='lines+markers', name='COV'), row=1, col=3)
    fig.update_layout(title_text="Metrics Monitoring")
    fig.show()


def get_report(
    build_model: Callable, 
    score_model: Callable,
    training: pd.DataFrame, 
    testset: pd.DataFrame,
    trainset_description: Dict,
    testset_description: Dict,
    mode: str
):
    neighbors = [1, 2, 4, *range(10, 150, 10)]

    execution_times = []
    ram_utilizations = []
    cpu_utilizations = []
    HRs = []
    MRRs = []
    COVs = []



    for n_size in neighbors:
        gc.collect()
        start_ram = psutil.virtual_memory().used
        start_cpu = psutil.cpu_times().user

        # Perform operations using uknn model
        # (Assuming you have functions like build_uknn_model, uknn_model_scoring, etc. defined)
        uknn_params = build_model({'n_neighbors': n_size}, training, trainset_description)

        end_ram = psutil.virtual_memory().used
        end_cpu = psutil.cpu_times().user
        ram_utilization = abs(end_ram - start_ram)
        cpu_utilization = end_cpu - start_cpu
        
        start_time = time.time()
        uknn_scores = score_model(uknn_params, testset, testset_description)
        downvote_seen_items(uknn_scores, testset, testset_description)
        recs = topn_recommendations(uknn_scores)
        metrics = model_evaluate(recs, holdout, testset_description)

        end_time = time.time()
        

        execution_time = end_time - start_time  # seconds

        # Append monitor data to lists
        execution_times.append(execution_time)
        ram_utilizations.append(ram_utilization)
        cpu_utilizations.append(cpu_utilization)
        HRs.append(metrics[0])
        MRRs.append(metrics[1])
        COVs.append(metrics[2])

        print(
            f'Neighbours: {n_size}\n',
            f'Similarity type: {mode}\n'\
            'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics),
            f'RAM Utilization: {ram_utilization} bytes, CPU Utilization: {cpu_utilization} s, Execution Time: {execution_time} s'
        )
        
    visualize_report(
        neighbors, execution_times, ram_utilizations,
        cpu_utilizations, HRs, MRRs, COVs
    )
    print(
        f"Best in HR's: {neighbors[np.argmax(HRs)]}.\n HR: {HRs[np.argmax(HRs)]} MRR: {MRRs[np.argmax(HRs)]} COV: {COVs[np.argmax(HRs)]}",
        f"Best in MRR's: {neighbors[np.argmax(MRRs)]}.\n HR: {HRs[np.argmax(MRRs)]} MRR: {MRRs[np.argmax(MRRs)]} COV: {COVs[np.argmax(MRRs)]}",
        f"Best in COV's: {neighbors[np.argmax(COVs)]}.\n HR: {HRs[np.argmax(COVs)]} MRR: {MRRs[np.argmax(COVs)]} COV: {COVs[np.argmax(COVs)]}",
        sep="\n"
    )

In [27]:
get_report(build_uknn_model, uknn_model_scoring, training, testset, data_description, data_description, modes[0])

Neighbours: 1
 Similarity type: unweighted
HR=0.0489, MRR=0.014, COV=0.701
 RAM Utilization: 12185600 bytes, CPU Utilization: 2.40625 s, Execution Time: 0.33592987060546875 s
Neighbours: 2
 Similarity type: unweighted
HR=0.0628, MRR=0.0225, COV=0.591
 RAM Utilization: 834740224 bytes, CPU Utilization: 2.90625 s, Execution Time: 0.2659893035888672 s
Neighbours: 4
 Similarity type: unweighted
HR=0.0757, MRR=0.025, COV=0.48
 RAM Utilization: 61353984 bytes, CPU Utilization: 2.28125 s, Execution Time: 0.34701085090637207 s
Neighbours: 10
 Similarity type: unweighted
HR=0.0831, MRR=0.0285, COV=0.366
 RAM Utilization: 29876224 bytes, CPU Utilization: 2.15625 s, Execution Time: 0.5123991966247559 s
Neighbours: 20
 Similarity type: unweighted
HR=0.0845, MRR=0.0308, COV=0.302
 RAM Utilization: 73355264 bytes, CPU Utilization: 2.28125 s, Execution Time: 0.6048955917358398 s
Neighbours: 30
 Similarity type: unweighted
HR=0.0848, MRR=0.0306, COV=0.262
 RAM Utilization: 63160320 bytes, CPU Utilizat

Best in HR's: 60.
 HR: 0.08612123219609143 MRR: 0.03082534161947875 COV: 0.21301295896328293
Best in MRR's: 60.
 HR: 0.08612123219609143 MRR: 0.03082534161947875 COV: 0.21301295896328293
Best in COV's: 1.
 HR: 0.04885723749585956 MRR: 0.013984447704222462 COV: 0.701133909287257


In [28]:
get_report(build_uknn_model_weighted, uknn_model_scoring_weighted, training, testset, data_description, data_description, modes[1])

Neighbours: 1
 Similarity type: weighted
HR=0.0489, MRR=0.014, COV=0.701
 RAM Utilization: 29732864 bytes, CPU Utilization: 2.078125 s, Execution Time: 0.31786561012268066 s
Neighbours: 2
 Similarity type: weighted
HR=0.0537, MRR=0.0197, COV=0.645
 RAM Utilization: 8708096 bytes, CPU Utilization: 2.625 s, Execution Time: 0.2586393356323242 s
Neighbours: 4
 Similarity type: weighted
HR=0.0613, MRR=0.0224, COV=0.592
 RAM Utilization: 13131776 bytes, CPU Utilization: 2.625 s, Execution Time: 0.30626797676086426 s
Neighbours: 10
 Similarity type: weighted
HR=0.0699, MRR=0.0248, COV=0.518
 RAM Utilization: 59654144 bytes, CPU Utilization: 2.265625 s, Execution Time: 0.41326284408569336 s
Neighbours: 20
 Similarity type: weighted
HR=0.077, MRR=0.0266, COV=0.445
 RAM Utilization: 52416512 bytes, CPU Utilization: 2.421875 s, Execution Time: 0.5491147041320801 s
Neighbours: 30
 Similarity type: weighted
HR=0.0798, MRR=0.0274, COV=0.402
 RAM Utilization: 84271104 bytes, CPU Utilization: 2.828125

Best in HR's: 110.
 HR: 0.08959920503477972 MRR: 0.03143300104628359 COV: 0.2661987041036717
Best in MRR's: 120.
 HR: 0.08827426300099371 MRR: 0.0321788724322676 COV: 0.25863930885529157
Best in COV's: 1.
 HR: 0.04885723749585956 MRR: 0.013984447704222462 COV: 0.701133909287257


# Strong generalization test

- Recall that in the strong generalization test you work with the warm-start scenario.
- It means that the set of test users is disjoint from the set of users in the training.
- You're provided with the basic functions to help you perform correct splitting, but there're still a few places where your input is required. Make sure you understand the logic of data splitting in this scenario.

## Preparing data (2 pts)

- Your task is to select **a subset of users who have the most recent interactions in their history** across entire dataset. These are going to be the **test users**.
- You will apply **holdout splitting to only this subset**.
  - Think, why simply taking all users (as in weak generalization test) makes no sense in this scenario. 

In [12]:
def split_by_time(data, time_q=0.95, timeid='timestamp'):
    '''
    Split the input `data` DataFrame into two parts based on the timestamp, with the split point
    being determined by the quantile value `time_q`. The function returns a tuple `(before, after)`
    containing the two DataFrames. The `after` DataFrame contains the rows with timestamps greater
    than or equal to the split point, while the `before` DataFrame contains the remaining rows.

    Details:
    The `quantile` method of the pandas DataFrame is used to calculate the time point (i.e., timestamp)
    that divides the data into two parts based on the given quantile value `time_q`. Specifically,
    the time point `split_timepoint` is calculated as the `time_q`th quantile of the values in the `timeid`
    column of the `data` DataFrame, using the interpolation method of `nearest`. This means that
    `split_timepoint` is the timestamp at or immediately after which `time_q` percent of the data points occur.
    '''
    split_timepoint = data[timeid].quantile(q=time_q, interpolation='nearest')
    after = data.query(f'{timeid} >= @split_timepoint')
    before = data.drop(after.index)
    return before, after

Firstly, you need to select a candidate subset of observations, from which you'll construct the the training, testset, and holdout datssets. Check the `split_by_time` function below and its description in the above cell.

In [39]:
before, after = split_by_time(data, time_q=0.95)

- Now it's time to perform holdout sampling based on the obtained timepoint splitting. 
- Remember, you only sample from the test users.
  - Test users's last ratings must be the most recent across the entire dataset. Use the global timepoint splitting obtained above.

In [40]:
testset_part_, holdout_ = leave_last_out(after, 'userid', 'timestamp') # your code for holdout sampling

# verify correctness of time-based splitting,
# i.e., for each test user, the holdout contains only future interactions w.r.t to testset
verify_time_split(testset_part_, holdout_)

- Prepare the data for training.
  - Take the correspoding part of the timepoint split.
  - Recall that **training and testset must be disjoint by users**.

In [41]:
training_ = before[~before['userid'].isin(holdout_['userid'].values)]

- Note that `testset_part_` only contains interactions of the test users **after the timepoint**.
- You need to combine it with the remaining histories of these users.
  - i.e., everything that's filtered out from the training data

In [42]:
# combine all test users data into a single `testset_` Dataframe.
testset_ = pd.concat(
    [
        before[before['userid'].isin(holdout_['userid'].values)], 
        testset_part_
    ],
    axis = 0,
    ignore_index=False
)

### Building internal representation of user and item index

Use the `transform_indices` function for building a contiguous index starting from 0.

In [43]:
training, data_index = transform_indices(training_, 'userid', 'movieid')

- Before applying new index to the test data note that:
  - the users in the `testset` must be the same as the users in the `holdout`;
  - the users in both `testset` and `holdout` must be ordered the same way.
- Below is the corresponding function `align_test_by_users` that ensures these two datasets' alignment.

In [14]:
def align_test_by_users(testset, holdout):
    test_users = np.intersect1d(holdout['userid'].values, testset['userid'].values)
    # only allow the same users to be present in both datasets
    testset = testset.query('userid in @test_users').sort_values('userid')
    holdout = holdout.query('userid in @test_users').sort_values('userid')
    return testset, holdout

Let's apply new item index to test data and finalize the test split:

In [45]:
holdout = reindex_data(holdout_, data_index, entities='items', filter_invalid=True)
testset = reindex_data(testset_, data_index, entities='items', filter_invalid=True)

testset, holdout = align_test_by_users(testset, holdout)

- Think why we do not apply new index to users here.

## Models implementation

- In this section you'll need to implement user-based KNN models for the warm-start scenario.
- Think carefully which data must be generated at the build time and which data must be generated in the scoring function.
  - Recall that test users are not part of the training data.
- The notes on neighborhood subsampling remain the same as before.

### Unweighted case (5 pts)

In [46]:
def build_uknn_model(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)
    return user_item_mtx, config['n_neighbors']


def uknn_model_scoring(params, testset, testset_description):
    user_item_mtx, n_neighbors = params
    user_item_mtx_test = generate_interactions_matrix(testset, testset_description, rebase_users=True)
    # compute similarity matrix with train users
    user_similarity = cosine_similarity(
        user_item_mtx_test, 
        user_item_mtx, 
        dense_output=False
    )
    user_similarity.setdiag(0)
    user_similarity.eliminate_zeros()
    user_similarity = truncate_similarity(
           user_similarity,
           n_neighbors
    )

    scores = user_similarity.dot(user_item_mtx).A

    return scores


In [47]:
def get_description(mode: str="train"):
    return dict(
        users = data_index['users'].name,
        items = data_index['items'].name,
        feedback = 'rating',
        n_users = len(data_index['users']) if mode=="train" else len(holdout),
        n_items = len(data_index['items']),
    )

In [48]:
%%time

uknn_params = build_uknn_model({'n_neighbors': 20}, training, get_description("train"))

CPU times: total: 31.2 ms
Wall time: 30.7 ms


In [49]:
%%time

uknn_scores = uknn_model_scoring(uknn_params, testset, get_description("test"))

CPU times: total: 359 ms
Wall time: 486 ms



Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.



### Weighted case (5 pts)

In [50]:
def build_uknn_model_weighted(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)

    return user_item_mtx, config['n_neighbors']

def uknn_model_scoring_weighted(params, testset, testset_description):
    user_item_mtx, n_neighbors = params
    user_item_mtx_test = generate_interactions_matrix(
        testset, testset_description,
        rebase_users=True
    )

    user_similarity = cosine_similarity(
        user_item_mtx_test, 
        user_item_mtx, 
        dense_output=False
    )
    user_similarity.setdiag(0)
    user_similarity.eliminate_zeros()
    user_similarity = truncate_similarity(
        user_similarity,
        n_neighbors
    )

    
    weights = user_similarity.sum(axis=0).A.squeeze()
    weights = np.divide(1., weights, where=weights!=0)

    user_similarity = user_similarity.dot(diags(weights))
    scores = user_similarity.dot(user_item_mtx).A
    return scores

In [51]:
%%time

uknn_params_weighted = build_uknn_model_weighted({'n_neighbors': 20}, training, get_description('train'))

CPU times: total: 15.6 ms
Wall time: 31.1 ms


In [52]:
%%time

uknn_scores_weighted = uknn_model_scoring_weighted(uknn_params_weighted, testset, get_description("test"))

CPU times: total: 469 ms
Wall time: 540 ms


 ## Evaluation (1 pts)

### Generate recommendations for both models

In [53]:
%%time

downvote_seen_items(uknn_scores, testset, get_description("test"))
uknn_recs = topn_recommendations(uknn_scores)

CPU times: total: 31.2 ms
Wall time: 40.3 ms


In [54]:
%%time

downvote_seen_items(uknn_scores_weighted, testset, get_description("test"))
uknn_recs_weighted = topn_recommendations(uknn_scores_weighted)

CPU times: total: 15.6 ms
Wall time: 33.1 ms


### Calculate metrics

In [55]:
modes = ['unweighted', 'weighted']
uknn_recs = dict(zip(modes, [uknn_recs, uknn_recs_weighted]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    if recs is None: continue
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout, get_description('test'))
    print(
        f'Similarity type: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Similarity type: unweighted
HR=0.0544, MRR=0.0168, COV=0.168

Similarity type: weighted
HR=0.0606, MRR=0.0161, COV=0.24



## Tuning (2 pts)
- Try to find a neighborhood size that gives you better results.
- Perform a simple grid-search experiment and report your findings.
- Optional: try improving results with a different similarity measure.

In [56]:
get_report(build_uknn_model, uknn_model_scoring, training, testset, get_description("train"), get_description("test"), modes[0])


Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient.



Neighbours: 1
 Similarity type: unweighted
HR=0.0321, MRR=0.00698, COV=0.409
 RAM Utilization: 8974336 bytes, CPU Utilization: 0.015625 s, Execution Time: 0.5579402446746826 s
Neighbours: 2
 Similarity type: unweighted
HR=0.047, MRR=0.0181, COV=0.323
 RAM Utilization: 2510848 bytes, CPU Utilization: 0.03125 s, Execution Time: 0.5084319114685059 s
Neighbours: 4
 Similarity type: unweighted
HR=0.047, MRR=0.0144, COV=0.264
 RAM Utilization: 11067392 bytes, CPU Utilization: 0.03125 s, Execution Time: 0.5037326812744141 s
Neighbours: 10
 Similarity type: unweighted
HR=0.0482, MRR=0.0172, COV=0.201
 RAM Utilization: 643072 bytes, CPU Utilization: 0.015625 s, Execution Time: 0.5261490345001221 s
Neighbours: 20
 Similarity type: unweighted
HR=0.0544, MRR=0.0168, COV=0.168
 RAM Utilization: 4001792 bytes, CPU Utilization: 0.03125 s, Execution Time: 0.5246984958648682 s
Neighbours: 30
 Similarity type: unweighted
HR=0.0556, MRR=0.0203, COV=0.154
 RAM Utilization: 3616768 bytes, CPU Utilization: 

Best in HR's: 90.
 HR: 0.0630407911001236 MRR: 0.02368248082092334 COV: 0.1144578313253012
Best in MRR's: 110.
 HR: 0.06056860321384425 MRR: 0.024664488786862088 COV: 0.11062431544359255
Best in COV's: 1.
 HR: 0.032138442521631644 MRR: 0.006980006670983186 COV: 0.4085432639649507


In [57]:
get_report(build_uknn_model_weighted, uknn_model_scoring_weighted, training, testset, get_description("train"), get_description("test"), modes[1])

Neighbours: 1
 Similarity type: weighted
HR=0.0321, MRR=0.00698, COV=0.409
 RAM Utilization: 18710528 bytes, CPU Utilization: 0.03125 s, Execution Time: 0.44240403175354004 s
Neighbours: 2
 Similarity type: weighted
HR=0.0445, MRR=0.0177, COV=0.338
 RAM Utilization: 10612736 bytes, CPU Utilization: 0.03125 s, Execution Time: 0.5589449405670166 s
Neighbours: 4
 Similarity type: weighted
HR=0.0532, MRR=0.0157, COV=0.313
 RAM Utilization: 15437824 bytes, CPU Utilization: 0.015625 s, Execution Time: 0.476712703704834 s
Neighbours: 10
 Similarity type: weighted
HR=0.0532, MRR=0.0201, COV=0.27
 RAM Utilization: 3325952 bytes, CPU Utilization: 0.015625 s, Execution Time: 0.4951448440551758 s
Neighbours: 20
 Similarity type: weighted
HR=0.0606, MRR=0.0161, COV=0.24
 RAM Utilization: 10883072 bytes, CPU Utilization: 0.015625 s, Execution Time: 0.49791622161865234 s
Neighbours: 30
 Similarity type: weighted
HR=0.0507, MRR=0.0175, COV=0.232
 RAM Utilization: 4247552 bytes, CPU Utilization: 0.0156

Best in HR's: 140.
 HR: 0.0630407911001236 MRR: 0.02357358683069437 COV: 0.1741511500547645
Best in MRR's: 140.
 HR: 0.0630407911001236 MRR: 0.02357358683069437 COV: 0.1741511500547645
Best in COV's: 1.
 HR: 0.032138442521631644 MRR: 0.006980006670983186 COV: 0.4085432639649507


# Final analysis (3 pts)

1. Provide an analysis on which model performs the best and explain why.
2. Explain the difference in computational complexity of your models. Consider how the training and the recommendation generation differ for different models in terms of
    - the amount of RAM,
    - the amount of disk storage,
    - the load on CPU.
3. How else would you modify the model to improve either the quality of recommendations or computational performance? Describe at least one modification and its envisioned effect.


## a. Weak generalization:

    a.1. Unweighted KNN

    HR: 0.08612123219609143 MRR: 0.03082534161947875 COV: 0.21301295896328293
    
    a.2. Weighted KNN

    HR: 0.08959920503477972 MRR: 0.03143300104628359 COV: 0.2661987041036717
## b. Strong generalization:

    b.1. Unweighted KNN

    HR: 0.0630407911001236 MRR: 0.02368248082092334 COV: 0.1144578313253012

    b.2. Weighted KNN

    HR: 0.0630407911001236; MRR: 0.02357358683069437; COV: 0.1741511500547645

Weak generalization outperforms strong one in terms of each metric. The weighted KNN + weak generalization shows the best performance. It also shows longer computation in comparison to other models. Time complexity for weak generalization: $\mathcal{O}(N^2M)$, for strong: $\mathcal{O}(N_{test} * N_{train} * M)$. 

The best in terms of metrics: Weighted KNN + weak generalization

The best in terms of computation complexity: Unweighted KNN + strong generalization

The performance can be improved with regularization. Nearest neighbors can be sampled which can speed up the performance

# WMF

In [15]:
before, after = split_by_time(data, time_q=0.95)
testset_part_, holdout_ = leave_last_out(after, 'userid', 'timestamp')
verify_time_split(testset_part_, holdout_)

test_users = holdout_['userid'].values
training_ = before.query('userid not in @test_users')
testset_ = pd.concat(
    [before.query('userid in @test_users'), testset_part_],
    axis = 0,
    ignore_index=False
)

training, data_index = transform_indices(training_, 'userid', 'movieid')
holdout = reindex_data(holdout_, data_index, entities='items', filter_invalid=True)
testset = reindex_data(testset_, data_index, entities='items', filter_invalid=True)

testset, holdout = align_test_by_users(testset, holdout)

holdout_user_index = {uid: i + training['userid'].max() + 1 for i, uid in enumerate(test_users)} #append test users to the end of matrix P

holdout['userid'] = holdout['userid'].map(holdout_user_index)
testset['userid'] = testset['userid'].map(holdout_user_index)


testset = testset[testset['userid'] < 6039]
holdout = holdout[holdout['userid'] < 6039]

In [26]:
data_description = dict(
    users = data_index['users'].name,
    items = data_index['items'].name,
    feedback = 'rating',
    n_users = len(data_index['users']) + len(holdout_user_index),
    n_items = len(data_index['items']),
    test_users = holdout[data_index['users'].name].values,
    train_users = np.unique(training[data_index['users'].name].values)
)

In [27]:
from numba import njit, objmode, prange
from tqdm import tqdm

def negative_sampling(useridx, itemidx, data_description, rng):
    all_items = np.unique(data[data_description['items']].values)
    new_users, new_items, new_ratings = [], [], []
    for i in tqdm(range(len(useridx))):
        user = useridx[i]
        item = itemidx[i]
        sample_weights = np.ones(len(all_items))
        sample_weights[item] = 0
        sampled_item = rng.choice(all_items, p=sample_weights / sample_weights.sum())
        new_users.append(user)
        new_items.append(item)
        new_ratings.append(0)
    return np.asarray(new_users), np.asarray(new_items), np.asarray(new_ratings)

def mf_sgd_build(config, data, data_description):
    useridx = data[data_description['users']].values
    itemidx = data[data_description['items']].values
    ratings = data[data_description['feedback']].values
    rng = np.random.default_rng(config.get('seed', None))
    print('negative sampling:')
    sampled_users, sampled_items, sampled_feedback = negative_sampling(useridx, itemidx, data_description, rng)
    useridx = np.concatenate((useridx, sampled_users))
    itemidx = np.concatenate((itemidx, sampled_items))
    ratings = np.concatenate((ratings, sampled_feedback))

    learning_rate = config['learning_rate']
    regularization = config['regularization']
    n_epochs = config['n_epochs']
    rank = config['rank']
    n_users = data_description['n_users']
    n_items = data_description['n_items']

    P, Q, _ = sgd_epochs(
        useridx, itemidx, ratings,
        learning_rate, regularization, n_epochs,
        rank, n_users, n_items,
        rng
    )
    return P, Q, rng, config

# @njit(nopython=True)""
def sgd_epochs(
    useridx, itemidx, ratings,
    learning_rate, regularization, n_epochs,
    rank, n_users, n_items,
    rng
):
    P = rng.normal(0, 0.01, (n_users, rank))
    Q = rng.normal(0, 0.01, (n_items, rank))
    history = []
    for epoch in range(n_epochs):
        mse = sgd_step(P, Q, useridx, itemidx, ratings, learning_rate, regularization, rng)
        history.append(mse)
        if (epoch+1) % 2 == 0:
            with objmode():
                signal = evaluation_callback(epoch, P, Q)
                if signal == 0:
                    break
    return P, Q, history

def check_metric_growth(holdout, data_description):
    def update_target_metric(metrics):
        hr, mrr, cov = metrics
        eval_callback.target_metrics.append(hr)

    def eval_callback(epoch, P, Q):
        mf_params = P, Q, None
        sgd_scores = mf_sgd_scoring_for_eval(mf_params, data_description)
        #downvoting seen items doesn't make sense here, because these items are also used in training
        sgd_recs = topn_recommendations(sgd_scores, topn=10)
        metrics = model_evaluate(sgd_recs, holdout, data_description)
        update_target_metric(metrics)
        stopping_criteria = 1
        if len(eval_callback.target_metrics) >= 2:
            stopping_criteria = eval_callback.target_metrics[-1] > eval_callback.target_metrics[-2]
        return int(stopping_criteria)
    eval_callback.target_metrics = []
    return eval_callback

# @njit(nopython=True)
def sgd_step(P, Q, useridx, itemidx, ratings, learning_rate, regularization, rng, update_q=True):
    n_interactions = len(ratings)
    squared_err = 0.

    for idx in rng.permutation(n_interactions):
        userid = useridx[idx]
        itemid = itemidx[idx]
        rating = ratings[idx]

        pi = P[userid]
        qj = Q[itemid]

        error = rating - pi @ qj

        pi += learning_rate * (error*qj - regularization*pi)

        if update_q:
            qj += learning_rate * (error*pi - regularization*qj)

        squared_err += error*error

    mse = squared_err / n_interactions
    return mse

def mf_sgd_scoring_for_eval(params, data_description):
    train_users = data_description['train_users']
    P, Q, _ = params
    scores = P[train_users] @ Q.T
    return scores

def mf_sgd_scoring(params, data, data_description):
    P, Q, rng, config = params
    test_users = data_description['test_users']

    useridx = data[data_description['users']].values
    itemidx = data[data_description['items']].values
    ratings = data[data_description['feedback']].values

    sgd_step(P, Q, useridx, itemidx, ratings, config['learning_rate'], config['regularization'], rng, False)

    scores = P[test_users] @ Q.T
    return scores

In [21]:
config = {
    "rank": 25,
    "n_epochs": 20,
    "learning_rate": 1e-3,
    "regularization": 1,
    "seed": 0xDEAD
}

In [22]:
%%time
_, training_subset_for_stopping = leave_last_out(training, 'userid', 'timestamp')
evaluation_callback = check_metric_growth(training_subset_for_stopping, data_description)
sgd_params = mf_sgd_build(config, training, data_description)

negative sampling:


100%|██████████| 725370/725370 [00:36<00:00, 19835.59it/s]


CPU times: total: 1min 19s
Wall time: 1min 35s


In [28]:
%%time
mf_scores = mf_sgd_scoring(sgd_params, testset, data_description)

CPU times: total: 1.53 s
Wall time: 1.74 s


In [29]:
%%time

downvote_seen_items(mf_scores, testset, data_description)
mf_recs = topn_recommendations(mf_scores)

CPU times: total: 0 ns
Wall time: 47.1 ms


In [30]:
modes = ['mf']
uknn_recs = dict(zip(modes, [mf_recs]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    if recs is None: continue
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout, data_description)
    print(
        f'{mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

mf
HR=0.0, MRR=0.0, COV=0.283



WMF seems to be worse than KNN in this case. 