In [1]:
import pandas as pd
import math

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

import sys

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

Import Datasets

In [25]:
rating_df = pd.read_csv("data/final_ratings_v3.csv") #final\data\final_ratings_v3.csv
games_df = pd.read_csv("data/game_learn_df_v3.csv")

game_info = pd.read_csv("data/bgref.csv")

In [6]:
rating_df.describe()

Unnamed: 0,bgg_id,Rating
count,4479191.0,4479191.0
mean,120729.1,6.638515
std,99896.81,1.529655
min,1.0,9.999999999999999e-31
25%,21523.0,6.0
50%,122515.0,7.0
75%,198740.0,7.75
max,374173.0,10.0


In [7]:
rating_df.groupby('Username')[['bgg_id']].count().sort_values('bgg_id',ascending=False).loc[rating_df.groupby('Username')[['bgg_id']].count().sort_values('bgg_id',ascending=False)['bgg_id']<1000000]

Unnamed: 0_level_0,bgg_id
Username,Unnamed: 1_level_1
oldgoat3769967,7619
leffe dubbel,4404
be4tngu,4001
tomvasel,3934
warta,3840
...,...
czot,501
Graybillion,501
r0land1199,501
harlaquinne,501


### Predefined Prep Functions

In [12]:
red_game = games_df.copy()

#### Scaling Columns

In [13]:
def ScaleMeUpScotty(wf = red_game):
    wf.set_index('bgg_id',inplace=True)
    scaler = MinMaxScaler()

    for col in wf.columns:
        colly = wf.loc[:, col].values.reshape(-1, 1)
        wf.loc[:, col] = scaler.fit_transform(colly)

    # Print the scaled DataFrame
    # print("Scaled Dataset Using MinMaxScaler")
    wf['avg_rating'] = wf['avg_rating']#*2
    wf['complexity']  =   wf['complexity']# *2
    wf.reset_index(inplace=True)

    return(wf)

In [14]:
def weight_game(user='ecoboardgeek123', wd=rating_df):
    wd = wd.loc[wd['Username']==user]
    if (((wd['Rating'].max())-(wd['Rating'].min())) <=2):
        substractor = 5
    else:
        substractor = wd['Rating'].mean()
    wd['Rating']=wd['Rating']-substractor
    result_dict = wd.set_index('bgg_id')['Rating'].to_dict()
    # print(f"Spannweite = {wd['Rating'].max()-wd['Rating'].min()} , Substractor = {substractor}")
    return(result_dict)

In [15]:
def col_dropper(wd=red_game, bgg_id_list=[]):
    wd_red = wd.loc[wd['bgg_id'].isin(bgg_id_list)]
    wd_red = wd_red.sum()
    wd_red = list(wd_red.loc[wd_red==0].reset_index()['index'])
    answer = wd.drop(columns=(wd_red))
    return(answer)
    

In [16]:
def similar_games(games_df=red_game, alt=10, bgg_ids_with_weights={}):
    # Get the attributes of the specified games in the input list
    games_df = col_dropper(wd=games_df, bgg_id_list=list(bgg_ids_with_weights.keys()))
    selected_games_attributes = games_df[games_df['bgg_id'].isin(bgg_ids_with_weights.keys())].iloc[:, 2:]

    if selected_games_attributes.empty:
        print("No games found for the specified bgg_ids.")
        return pd.DataFrame()

    # Calculate the similarity between all games and the specified games using cosine similarity
    similarity_scores = (cosine_similarity(selected_games_attributes, games_df.iloc[:, 2:]))

    # Apply the weights to the similarity scores
    for bgg_id, weight in bgg_ids_with_weights.items():
        indices = games_df[games_df['bgg_id'] == bgg_id].index
        if len(indices) > 0:
            index = indices[0]
            similarity_scores[:, index] *= weight

    # Sum the similarity scores across the rows
    total_similarity_scores = (similarity_scores.mean(axis=0)
)
    # Add similarity scores as a new column to the DataFrame
    games_with_similarity = games_df.assign(similarity=total_similarity_scores)

    #Get rid of Games in rated Frame
    # print('bla',games_with_similarity.loc[~games_with_similarity['bgg_id'].isin(list(bgg_ids_with_weights.keys()))])#.loc[games_with_similarity['bgg_id'].isin(list[bgg_ids_with_weights.keys()])], 'test')
    games_with_similarity=games_with_similarity.loc[~games_with_similarity['bgg_id'].isin(list(bgg_ids_with_weights.keys()))]
    
    # Sort the games based on similarity scores in descending order
    top_similar_games = games_with_similarity.sort_values('similarity', ascending=False).head(alt)

    return list(top_similar_games['bgg_id'])

#### Second Stage

In [17]:
def minbo_reduction(wf, user):
    minbo = math.floor((wf.loc[wf['Username']==user,'count']))
    print(f'start minbo: {minbo}')
    correction = 1
    while (correction== 1):
        wt=wf.loc[wf['count']>=minbo]
        if len(wt)<=150:
            minbo = math.floor(minbo*.99)
        elif minbo <= 1:
            sys.exit("not enough Ratings")
        else:
            print(f'Minbo set to {minbo}')
            correction=0
            wf = wt
    return(wf)

In [18]:
def similar_taste(wf = rating_df, include_games = [], alt = 10, u_id= 'kainolagoni'):
    

    ## Filter location visited min minbo times
    filtered_df = wf[wf['Username'] == u_id]
    games = filtered_df['bgg_id'].unique()
    filtered_df = wf[wf['bgg_id'].isin(games)]
    count_table = filtered_df['Username'].value_counts().reset_index()
    count_table.columns = ['Username', 'count']

    count_table = minbo_reduction(wf = count_table, user = u_id)

    user_filter = wf[wf['bgg_id'].isin(include_games)]['Username'].unique()

    count_table=count_table.loc[(count_table['Username'].isin(user_filter)) | (count_table['Username'] == u_id),'Username']

    keeper = wf.copy()

    wf=wf.loc[wf['Username'].isin(count_table)]

    only_known = wf.copy()
    only_known.loc[only_known['Username']==u_id,'bgg_id']
    only_known.loc[only_known['bgg_id'].isin(only_known.loc[only_known['Username']==u_id,'bgg_id'])]

    users_items = pd.pivot_table(
        data=wf.loc[wf['bgg_id'].isin(include_games)],
        values='Rating',
        index='Username',
        columns='bgg_id'
    )

    known_items = pd.pivot_table(
        data=only_known,
        values='Rating',
        index='Username',
        columns='bgg_id'
    )

    users_items.fillna(0, inplace=True)
    known_items.fillna(0, inplace=True)

    user_similarities = pd.DataFrame(
        cosine_similarity(known_items),
        columns=known_items.index,
        index=known_items.index
    )

    weights = (
        user_similarities.query("Username!=@u_id")[u_id] / sum(user_similarities.query("Username!=@u_id")[u_id])
    )
        
    # select restaurants that the inputed user has not visited
    not_visited_restaurants = users_items

    # dot product between the not-visited-restaurants and the weights
    weighted_averages = pd.DataFrame(not_visited_restaurants.T.dot(weights), columns=["predicted_rating"])

    result = weighted_averages.sort_values("predicted_rating", ascending=False).head(alt)

    result.reset_index(inplace=True)
    
    return(result)

#### All in One

In [19]:
def gib_spiele_digga(f_alt = 10000, s_alt = 10, user = 'kainolagoni', rat_df = rating_df, game_frame=games_df):
    answer = (
        similar_taste(
            wf = rat_df
            , include_games = similar_games(
                games_df=ScaleMeUpScotty(
                    wf = game_frame
                )
                , alt=f_alt
                , bgg_ids_with_weights=weight_game(
                    user
                    , wd=rat_df
                )
            )
            , alt = s_alt
            , u_id= user
        )
    )
    return(answer)

In [27]:
test_gatherer = gib_spiele_digga(user='r0land1199')
test_gatherer

start minbo: 501
Minbo set to 297


Unnamed: 0,bgg_id,predicted_rating
0,155426,6.96795
1,182874,6.854465
2,50,6.704954
3,40692,6.70295
4,170042,6.678844
5,194655,6.669666
6,54043,6.644901
7,244522,6.585884
8,102680,6.571137
9,146886,6.521367


In [36]:
X= test_gatherer
Y= game_info
def get_feature(result_file, feature_file):
    return(Y.loc[feature_file['bgg_id'].isin(result_file['bgg_id'])])

In [37]:
get_feature(test_gatherer, game_info)

Unnamed: 0,bgg_id,name,image,video
45,50,Lost Cities,https://cf.geekdo-images.com/AL5D-dXabY-Lk3PqI...,http://www.youtube.com/watch?v=TjMmxEWc42o
23207,40692,Small World,https://cf.geekdo-images.com/aoPM07XzoceB-RydL...,http://www.youtube.com/watch?v=iFaO8fvdD7k
24574,54043,Jaipur,https://cf.geekdo-images.com/_LTujSe_o16nvjDC-...,http://www.youtube.com/watch?v=dTcA8SnBUpE
29442,102680,Trajan,https://cf.geekdo-images.com/djdeCr__iNV5rMnGn...,http://www.youtube.com/watch?v=9InyCaUvKgc
34969,146886,La Granja,https://cf.geekdo-images.com/ufUK_7N_13rYhsIur...,http://www.youtube.com/watch?v=4QaezIw8QPk
36303,155426,Castles of Mad King Ludwig,https://cf.geekdo-images.com/jVAMlNeDMVZakxJPy...,http://www.youtube.com/watch?v=NwA4pKmtfeg
38518,170042,Raiders of the North Sea,https://cf.geekdo-images.com/hXdfAhgZ8OeyWLunt...,http://www.youtube.com/watch?v=JPWHi2z6sd4
40692,182874,Grand Austria Hotel,https://cf.geekdo-images.com/GlCmHjdLwv3SqSjv5...,http://www.youtube.com/watch?v=9u3kocuG_Go
42283,194655,Santorini,https://cf.geekdo-images.com/665Ply7Ho1WVf1v1i...,http://www.youtube.com/watch?v=tSgBvcdtCVA
48220,244522,That's Pretty Clever!,https://cf.geekdo-images.com/3-unuy5JW1cC8C6EU...,http://www.youtube.com/watch?v=N6vUSIiuvvQ


In [28]:
game_info

Unnamed: 0,bgg_id,name,image,video
0,1,Die Macher,https://cf.geekdo-images.com/rpwCZAjYLD940NWwP...,http://www.youtube.com/watch?v=ymjkl1bUD4w
1,2,Dragonmaster,https://cf.geekdo-images.com/oQYhaJx5Lg3KcGis2...,http://www.youtube.com/watch?v=zPrn3rF9oPg
2,3,Samurai,https://cf.geekdo-images.com/o9-sNXmFS_TLAb7Zl...,http://www.youtube.com/watch?v=1fPePa7Gs-8
3,4,Tal der Könige,https://cf.geekdo-images.com/nYiYhUlatT2DpyXaJ...,
4,5,Acquire,https://cf.geekdo-images.com/FfguJeknahk88vKT7...,http://www.youtube.com/watch?v=OmqAkJnqmjE
...,...,...,...,...
67477,391128,A game about auctioning cubes after winning an...,https://cf.geekdo-images.com/OECKk9FsOFbR5JIv1...,
67478,391161,Medang,https://cf.geekdo-images.com/TOTnQidtHg9h2z9YH...,
67479,391191,The Smurfs: Hidden Village,https://cf.geekdo-images.com/477p-RSmx9-Oc7n8D...,
67480,391288,Firefly: The Game – 10th Anniversary Collector...,https://cf.geekdo-images.com/FtTleN6TrwDz378_T...,http://www.youtube.com/watch?v=J-acOorCFYk
