In [38]:
import pandas as pd
import numpy as np
import datetime as dt

from pathlib import Path

import lenskit

from src import paths
from src.datasets import daocensus_text

## Parameters

In [24]:
ORG_NAME = 'Decentraland'
ORG_PLATFORM: str = 'snapshot'
USE_ORG_NAMES: bool = True

# Evaluation
K_RECOMMENDATIONS: list[int] = [1,3,5,10]
SPLITS_FREQ: str = 'W-THU' # split weekly
SPLITS_NORMALIZE = True # Wether or not to move everything to 00:00
LAST_SPLITS = 10 # Use just last 10 splits
CUTOFF_DATE_STR: str = '2023-07-29'

In [27]:
CUTOFF_DATE = dt.datetime.fromisoformat(CUTOFF_DATE_STR) if CUTOFF_DATE_STR else None

print('CUTOFF_DATE', CUTOFF_DATE, type(CUTOFF_DATE))
# Metrics Dataframe
MDF_FILE = Path(MDF_FILE) if MDF_FILE else paths.pln_mdf(ORG_NAME, SPLITS_FREQ, SPLITS_NORMALIZE, cutoff_date=CUTOFF_DATE)
print(MDF_FILE)

CUTOFF_DATE 2023-07-29 00:00:00 <class 'datetime.datetime'>
data/pln/mdf_Decentraland_W-THU_normalize-cutoff_date=2023-07-29 00:00:00.pkl


## Getting the dataset

In [39]:
print(ORG_NAME, ORG_PLATFORM, USE_ORG_NAMES, CUTOFF_DATE)
dfv, dfp = daocensus_text.get("./data/daos-census-text", ORG_NAME, ORG_PLATFORM, use_org_names=USE_ORG_NAMES, cutoff_date=CUTOFF_DATE)
dfv[['voter', 'proposal']] = dfv[['voter', 'proposal']].astype(str)
dfp[['id']] = dfp[['id']].astype(str)
dfp = dfp.set_index('id')
print(dfv.info())
print(dfp.info())

Decentraland snapshot True 2023-07-29 00:00:00
<class 'pandas.core.frame.DataFrame'>
Int64Index: 116560 entries, 0 to 116559
Data columns (total 10 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   platform       116560 non-null  object        
 1   name           116560 non-null  object        
 2   id             116560 non-null  object        
 3   proposal       116560 non-null  object        
 4   deployment     116560 non-null  object        
 5   platform_vote  116560 non-null  object        
 6   voter          116560 non-null  object        
 7   date           116560 non-null  datetime64[ns]
 8   choice         116560 non-null  object        
 9   weight         116560 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(8)
memory usage: 9.8+ MB
None
<class 'pandas.core.frame.DataFrame'>
Index: 1942 entries, d788be3c-9195-5dc3-b4c9-ad841273f18e to 332e27a3-29a2-5044-9b1f-16b2f0130022
Data c

In [41]:
def to_lenskit(dfv):
    df = dfv[['voter', 'proposal', 'date']].rename(columns={
        'voter': 'user',
        'proposal': 'item',
        'date': 'timestamp',
    })
    df['user'] = df['user'].astype('str')
    df['item'] = df['item'].astype('str')
    df['rating'] = 1
    
    return df

## Making the model

In [53]:
from lenskit.algorithms import item_knn, user_knn, Recommender

In [43]:
# TODO: Set the sim_options
# TODO: Set the bsl_options
algo = item_knn.ItemItem(
    feedback='implicit', # VERY IMPORTANT
    nnbrs=3,  # the maximum number of neighbors for scoring each item (None for unlimited)
)
algo.fit(to_lenskit(dfv))
algo

<lenskit.algorithms.item_knn.ItemItem at 0x7fee2fc0eac0>

In [50]:
user = '0xffd92144cafd599a5a93e4805ca4d8f0e666d623'
dfvu = dfv[dfv['voter'] != user]
dfvu

Unnamed: 0,platform,name,id,proposal,deployment,platform_vote,voter,date,choice,weight
0,snapshot,Decentraland,60d57e3c-772a-55c6-b5bf-dc248cfc21f4,b86aa059-3d31-5d41-a472-70962816f779,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0xd39e5cc83a1c5cf8590badbda6ec50fc35e9c8f34c2e...,0xe7af1c70f8f089c4c3bd71999692c6c5a15d9e2a,2021-12-17 12:28:01,"[0, 0, 0, 193.18689615308116, 0, 0]",193.0
1,snapshot,Decentraland,bd60a83f-2a23-5035-802b-7d3ecff9d4e2,b86aa059-3d31-5d41-a472-70962816f779,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,0x414089deabab4681c3c3b679b8f231182ed8d8a1b7c8...,0xc54a6c3778016b06cbd126ccc3b5bc06c5f666fb,2021-12-17 02:16:23,"[0, 0, 0, 188.71532546843463, 0, 0]",188.0
2,snapshot,Decentraland,0f868cb1-b4b7-58a0-bf74-26573db45465,b86aa059-3d31-5d41-a472-70962816f779,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmRqNordvsSWx7zCWENSEhiZV8J89DNuTijGr2rQ6n85Vj,0xd82d005e8f8d5385db40ba23884a5c967bb1e8af,2021-12-17 00:38:22,"[0, 0, 4000, 399.825, 300, 0]",4699.0
3,snapshot,Decentraland,f50f9e59-f5cc-5a82-9cac-36f7cedce08c,b86aa059-3d31-5d41-a472-70962816f779,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmWvJrnW98XkSCpvkmathKn5bDjfrGtS3LWTQgztgUqK5L,0xf4c64db66ffb301985f5ecd85c8f3f9c02f2659d,2021-12-16 18:47:08,"[0, 2000, 0, 0, 100, 0]",2100.0
4,snapshot,Decentraland,feb2353c-9314-5373-9d07-0ff2dc0de59d,b86aa059-3d31-5d41-a472-70962816f779,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,QmRGjFe5621Zb3i59c1sbLBnaxfkdp7nbHbvJowA23SunG,0xd5e9ef1cedad0d135d543d286a2c190b16cbb89e,2021-12-16 18:32:15,"[0, 0, 48000, 344.575, 300, 152688.025]",201332.0
...,...,...,...,...,...,...,...,...,...,...
116555,snapshot,Decentraland,1234ccfe-7691-53d8-a783-f45bbfb58e9d,d083109e-4819-54b9-a01c-67bd5a770f65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,bafkreidxyivqx42pvaxoqtwqpifroeem5x3ftr33yudp3...,0x1156bf625b37623a86d004e66e0a01ec4b17e051,2022-09-06 18:47:54,"[0, 2000, 12000, 11398.326305277713, 700, 0]",26098.0
116556,snapshot,Decentraland,391cd10b-3bca-5d98-a063-c00f00457315,d083109e-4819-54b9-a01c-67bd5a770f65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,bafkreicqzstsslmg4uhnhhe2qwjbve5p3l7m2wj4wylbo...,0xbd03add5da0e173c67c9c1073ffba017147c42d4,2022-09-06 18:44:04,"[0, 0, 0, 0, 100, 0]",100.0
116557,snapshot,Decentraland,926887f7-57b8-558c-873e-d945a45fe1e0,d083109e-4819-54b9-a01c-67bd5a770f65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,bafkreid7ieokukccz37ujdkvc33pt62wfj3svcxce5kym...,0x4da03f669dd9609dc49ca6f3451ba22d3b792395,2022-09-06 18:21:14,"[0, 0, 0, 61.1561783615148, 100, 0]",161.0
116558,snapshot,Decentraland,95f06927-8a0f-5c26-b689-7deb11ddc7a6,d083109e-4819-54b9-a01c-67bd5a770f65,41fd8de5-f8e2-5023-86a3-825c49e9ad7f,bafkreidp7ftbonpcdjjew3i62q54yfczs3jw6hjxmdpfn...,0xd6e62a97a55537cd04847bb73e22208bd20106aa,2022-09-06 18:14:10,"[0, 0, 0, 0, 400, 406.78]",806.0


In [51]:
algo.predict_for_user('0xffd92144cafd599a5a93e4805ca4d8f0e666d623', [
    'ec8f1ce2-1c95-5206-bf75-7bc141ba0a08', # positive
    'b86aa059-3d31-5d41-a472-70962816f779', # negative
])

item
ec8f1ce2-1c95-5206-bf75-7bc141ba0a08    2.056486
b86aa059-3d31-5d41-a472-70962816f779    0.221711
dtype: float64