# Boulder recommendation  
## Simple exploratory notebook to understand the functioning of similarity calculation  
Cosine similarity over the Ascents

## SQLAlchemy session creation

In [None]:
import numpy as np
import pandas as pd

from sqlalchemy.orm import Session
from sqlalchemy import create_engine

DB_URL = "sqlite:///../../bleau_info.db"

engine = create_engine(DB_URL, echo=False)

session = Session(engine)

In [None]:
import sys

sys.path.append("../../")

## Cosine similarity matrix training **based on similar repetitors**

### Database Query

In [2]:
from sqlalchemy import select
from models.repetition import Repetition


ascents = session.execute(
    select(Repetition.user_id, Repetition.boulder_id)
).all()
ascents_df = pd.DataFrame(data=ascents, columns=["user_id", "id"])

### boulder_user matrix (Pivot table)

In [3]:
boulder_user_matrix = ascents_df.pivot_table(
    index="id",
    columns="user_id",
    aggfunc="size",
    fill_value=0,
    dropna=True,
)
# boulder_user_matrix = boulder_user_matrix[boulder_user_matrix.index < 20]
boulder_ids = boulder_user_matrix.index

In [4]:
display(boulder_user_matrix)

user_id,1,2,3,4,5,6,7,8,9,10,...,7361,7362,7363,7364,7365,7366,7367,7368,7369,7371
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40231,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40232,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40233,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
40234,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Conversion to sparse matrix

In [5]:
from scipy.sparse import csr_matrix

boulder_user_matrix = csr_matrix(boulder_user_matrix)

### Ascents similarity training

In [6]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_ascents = cosine_similarity(boulder_user_matrix)

In [7]:
similarity_ascents_df = pd.DataFrame(
    similarity_ascents, index=boulder_ids, columns=boulder_ids
)
display(similarity_ascents_df)

id,2,3,5,7,10,11,13,14,15,16,...,40226,40227,40228,40229,40230,40231,40232,40233,40234,40235
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,1.0,0.0,0.0,0.0,0.0,1.0,0.188982,0.301511,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.0,1.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.096225,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,0.0,0.0,1.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
7,0.0,0.0,0.0,1.0,0.0,0.0,0.308607,0.123091,0.157135,0.154303,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
10,0.0,0.0,0.0,0.0,1.0,0.0,0.000000,0.174078,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40231,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.068041,0.000000,...,0.707107,0.500000,1.000000,1.000000,0.816497,1.000000,0.707107,1.000000,0.707107,1.000000
40232,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.353553,0.707107,0.707107,0.577350,0.707107,1.000000,0.707107,1.000000,0.707107
40233,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.068041,0.000000,...,0.707107,0.500000,1.000000,1.000000,0.816497,1.000000,0.707107,1.000000,0.707107,1.000000
40234,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.353553,0.707107,0.707107,0.577350,0.707107,1.000000,0.707107,1.000000,0.707107


## Similarity aggregation

In [None]:
def recommend_boulders(input_boulders, top_n=5):
    sim_scores = similarity_ascents_df[input_boulders].sum(axis=1)

    sim_scores.drop(labels=input_boulders, inplace=True)

    return sim_scores.nlargest(top_n)


recommendations = recommend_boulders([60], top_n=10)
print(recommendations)

id
89      0.516883
52      0.514150
23      0.499753
9580    0.471773
124     0.463270
1836    0.452219
1837    0.446142
15      0.443704
33      0.432099
4641    0.425723
dtype: float64
