In [2]:
import pickle
import pandas as pd

In [4]:
df = pd.read_pickle("datasets/clean_games.pkl")

In [7]:
df

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,...,Well-Written,Werewolves,Western,Wholesome,Word Game,World War I,World War II,Wrestling,Zombies,eSports
0,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,...,0,0,0,0,0,0,0,0,0,0
1,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,...,0,0,0,0,0,0,0,0,0,0
2,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,...,0,0,0,0,0,0,0,0,0,0
3,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,...,0,0,0,0,0,0,0,0,1,0
4,249050,Dungeon of the ENDLESS™,2014-10-27,True,True,False,Very Positive,88,8784,11.99,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50867,2296380,I Expect You To Die 3: Cog in the Machine,2023-09-28,True,False,False,Very Positive,96,101,22.00,...,0,0,0,0,0,0,0,0,0,0
50868,1272080,PAYDAY 3,2023-09-21,True,False,False,Mostly Negative,38,29458,40.00,...,0,0,0,0,0,0,0,0,0,0
50869,1402110,Eternights,2023-09-11,True,False,False,Very Positive,89,1128,30.00,...,0,0,0,0,0,0,0,0,0,0
50870,2272250,Forgive Me Father 2,2023-10-19,True,False,False,Very Positive,95,82,17.00,...,0,0,0,0,0,0,0,0,0,0


In [16]:
df_original = df.iloc[:, 13:]
df_original.dtypes

1980s           int64
1990's          int64
2.5D            int64
2D              int64
2D Fighter      int64
                ...  
World War I     int64
World War II    int64
Wrestling       int64
Zombies         int64
eSports         int64
Length: 441, dtype: object

In [17]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=2, random_state=1)
res = svd.fit_transform(df_original)

res

array([[1.95964853, 0.53767417],
       [0.30279474, 0.61569183],
       [2.5355177 , 0.42725785],
       ...,
       [0.        , 0.        ],
       [1.97292879, 1.41341342],
       [0.        , 0.        ]])

In [22]:
import plotly.express as px

df_decomposed = pd.DataFrame(res)
df_decomposed['title'] = df['title']
px.scatter(df_decomposed, 0, 1, text='title', width=800, height=800)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [12]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
import torch

class KnnCBF:
    def __init__(self, items, 
                item_col='app_id',
                score_col='is_recommended',
                nearest_k=3,
                metric="manhattan"):
        """
        Args:
            items:     (DataFrame) games dataframe contain tags attribute
            item_col:  (String) column name of items column
            score_col: (String) column name of interactions column
            k_nearest: (Integer) number of nearest interacted items for similarity
        """
        
        self.item_col = item_col
        self.score_col = score_col
        self.nearest_k = nearest_k
        self.metric = metric
        self.items = items

    def fit(self, feature_vector, y, test_data):
        classifier = KNeighborsClassifier(
                        n_neighbors=self.nearest_k, 
                        metric=self.metric
                        )
        classifier.fit(feature_vector, y)

        return classifier.kneighbors(test_data)
    
    def fit_predict(self, df_pred, filter_index, k=10):
        select_row      = self.items['app_id'].isin(df_pred['app_id'])
        df_preferences  = self.items[select_row].merge(df_pred, on=['app_id'])
        df_test         = self.items[~select_row & filter_index]

        _output_preds = []
        _score_preds = []

        # Fitting using Features
        label = df_preferences['is_recommended']
        X = df_preferences.drop(column=['is_recommended'])

        test = df_test.iloc[:, 1:]
        print(test)

        neighbor_distances, neighbor_indices = self.fit(X, label, test)

        rating = label.loc[neighbor_indices.flatten()] \
                        .values \
                        .reshape(neighbor_indices.shape)
        result = np.sum(rating * neighbor_distances, axis=1) / self.nearest_k

        top_tensor = torch.from_numpy(result).topk(k)
        indices = top_tensor.indices.tolist()
        score = top_tensor.values

        _output_preds.append( [self.item_map[_id] for _id in indices] )

        _score_preds.append( score.tolist() )

        df_pred['predicted_items'] = _output_preds
        df_pred['predicted_score'] = _score_preds

        escaped_id = [
            ele for i_list in df_pred['predicted_items'].values for ele in i_list
        ]

        escaped_score = [
            score for s_list in df_pred['predicted_score'].values for score in s_list
        ]

        pred_result = pd.DataFrame({
            'app_id' : escaped_id,
            'predicted_score' : escaped_score
        })
        
        return pred_result