In [1]:
from typing import List, Union
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse

from pydantic import BaseModel
from starlette.middleware.cors import CORSMiddleware
import certifi
import pandas as pd
from surprise import Reader, Dataset, SVD
from runSVD import get_unplayed_surprise, recomm_game_by_surprise
from surprise.dataset import DatasetAutoFolds
from surprise.dataset import Reader
from surprise import SVD

In [2]:
reader = Reader(line_format = 'user item rating', sep=',', rating_scale=(0.5,10))
data_folds = DatasetAutoFolds(ratings_file='./ratings949.csv', reader=reader)
trainset = data_folds.build_full_trainset()
algo = SVD(n_factors=50, n_epochs=20, random_state=42)
algo.fit(trainset)
games = pd.read_csv('./games_detailed_info.csv', encoding='UTF-8', usecols=[2,5])
ratings = pd.read_csv("./bgg-19m-reviews.csv", encoding='UTF-8')
total_games = games['id'].tolist().sort()

In [4]:
import joblib
joblib.dump(algo, './SVDalgo_last.pkl')

['./SVDalgo_last.pkl']

In [5]:
def recomm_game_by_surprise_sortbyiid(algo, userID, unplayed_games,games, top_n=10):
    predictions = [algo.predict(str(userID), str(gameId)) for gameId in unplayed_games]

    def sortkey_est(pred):
        return pred.iid
    predictions.sort(key=sortkey_est, reverse=True)
#     top_predictions = predictions
#     print(predictions)
#     top_game_ids = [int(pred.iid) for pred in top_predictions]
    top_game_ratings = [pred.est for pred in predictions]
#     top_game_titles = games[games.id.isin(top_game_ids)]['primary']
#     top_game_preds = [(ids, ratings, title) for ids, ratings, title in zip(top_game_ids, top_game_ratings, top_game_titles)]
#     top_game_preds = [{f"{ids}" : f"{ratings}"} for ids, ratings in zip(top_game_ids, top_game_ratings)]
    return top_game_ratings

In [6]:
def recommend_comb(idlist):
    df = pd.DataFrame(columns=total_games)
    for id in idlist:
        result = recomm_game_by_surprise_sortbyiid(algo, id, total_games, games, top_n=10)
#         print(result)
        df = df.append(result,ignore_index=True)
    return(df)

In [7]:
meandf = recommend_comb(['wilrh','Parsat']).mean()

  df = df.append(result,ignore_index=True)


In [8]:
print(meandf)

30549          NaN
822            NaN
13             NaN
68448          NaN
36218          NaN
            ...   
217378         NaN
18063          NaN
10052          NaN
165946         NaN
0         7.166771
Length: 21632, dtype: float64


In [9]:
recomm_game_by_surprise_sortbyiid(algo, 'wilrh', total_games, games, top_n=10)

[5.234076657545901,
 7.109131832972411,
 7.073736295450231,
 6.305969240959559,
 7.870280530875453,
 3.319284437772504,
 6.71978565309478,
 6.901685037927438,
 6.768619279657903,
 6.779865725717135,
 7.843689390619999,
 6.927954106982002,
 7.595165315654436,
 7.04074248777804,
 7.07988783513477,
 6.885900131248817,
 7.253888516585686,
 7.14116779702587,
 6.739397933925818,
 6.723249951105739,
 7.321269165955499,
 8.792283188228842,
 6.894673118564145,
 6.284583050810166,
 7.0909225107764735,
 7.923023615703295,
 7.765747140278027,
 6.480549545990908,
 7.5498197423163695,
 6.3337573159644425,
 8.5905068418318,
 7.290714423609939,
 5.874916679689367,
 7.758520124722221,
 7.468480544984169,
 7.993895419367185,
 5.463328199966672,
 8.939976690956396,
 7.26158183633058,
 8.09062641565559,
 7.571580156027666,
 7.867570773926932,
 6.464806034901034,
 7.49206899965769,
 6.727514606160579,
 7.474841144137265,
 6.335004740535508,
 7.260694929841765,
 6.984618224161742,
 5.6477648848155715,
 6.63

In [10]:
recomm_game_by_surprise_sortbyiid(algo, 'Parsat', total_games, games, top_n=10)

[4.759919985665996,
 6.695710054323028,
 7.85573549708775,
 6.106447739496126,
 7.536488258073443,
 2.8033146132657203,
 5.928797043632873,
 6.875345842263163,
 6.572133718221253,
 6.799334463524432,
 7.888793231032896,
 7.097551194652508,
 6.7403862303625734,
 7.049207463520617,
 6.4551738093075075,
 7.398155538662097,
 7.1349443985936585,
 6.585612488033105,
 6.714199710939859,
 6.088703058921756,
 7.026129889534392,
 8.166952065228553,
 6.811829685987767,
 6.258492884026875,
 7.0089825511537915,
 7.514634637356764,
 7.8851164483588905,
 6.942911181772682,
 7.407457373407263,
 6.138416675082486,
 8.567850135583633,
 7.461015597292309,
 5.557043211016664,
 7.127848068935627,
 6.961882693030612,
 8.556351526817181,
 5.187125752739989,
 8.48273096359409,
 7.050548897443682,
 8.125033465367473,
 7.644292740410712,
 7.259443297238297,
 6.297488975349424,
 7.359644667538377,
 6.502795652054629,
 6.941615956175851,
 5.757721773885862,
 7.238269727025707,
 6.590575495651345,
 4.8383287849936

In [72]:
total_games = sorted(games['id'].tolist())

In [92]:
df = pd.DataFrame(index=total_games)

In [83]:
df1 = pd.DataFrame(recomm_game_by_surprise_sortbyiid(algo, 'Parsat', total_games, games, top_n=10))

In [84]:
df2 = pd.DataFrame(recomm_game_by_surprise_sortbyiid(algo, 'wilrh', total_games, games, top_n=10))

In [95]:
res_df = pd.concat([df1,df2],axis=1,ignore_index=True)

In [91]:
res_df.to_csv('./combi.csv')

In [30]:
meandf = res_df.mean(axis='columns')

In [96]:
print(res_df)

              0         1
0      8.164557  7.453029
1      7.397916  7.740396
2      7.567211  7.587645
3      7.637065  7.549040
4      7.720915  7.886830
...         ...       ...
21626  5.910191  6.322198
21627  7.494511  7.578896
21628  7.765459  7.715931
21629  8.183499  8.496467
21630  8.851930  8.830711

[21631 rows x 2 columns]


In [32]:
meandf_sorted = meandf.sort_values(by=meandf.columns[0])

AttributeError: 'Series' object has no attribute 'columns'

In [33]:
meandf.sort(key=meandf[1])

AttributeError: 'Series' object has no attribute 'sort'

In [34]:
print(sorted(meandf[1]))

TypeError: 'numpy.float64' object is not iterable

In [35]:
df = pd.DataFrame(columns=total_games)

In [36]:
print(df)

Empty DataFrame
Columns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 108, 109, 110, ...]
Index: []

[0 rows x 21631 columns]


In [57]:
def recomm_game_by_surprise_sortbyiid(algo, userID, unplayed_games,games, top_n=10):
    predictions = [algo.predict(str(userID), str(gameId)) for gameId in unplayed_games]

    def sortkey_est(pred):
        return int(pred.iid)
    predictions.sort(key=sortkey_est, reverse=False)
#     top_predictions = predictions
#     print(predictions)
    top_game_ids = [int(pred.iid) for pred in predictions]
    top_game_ratings = [pred.est for pred in predictions]
#     top_game_titles = games[games.id.isin(top_game_ids)]['primary']
#     top_game_preds = [(ids, ratings, title) for ids, ratings, title in zip(top_game_ids, top_game_ratings, top_game_titles)]
#     top_game_preds = [{f"{ids}" : f"{ratings}"} for ids, ratings in zip(top_game_ids, top_game_ratings)]
#     print(top_game_preds)
    return top_game_ratings

In [56]:
df1 = pd.DataFrame.from_dict(recomm_game_by_surprise_sortbyiid(algo, 'Parsat', total_games, games, top_n=10), orient='index')

[{'1': '8.164557014470411'}, {'2': '7.397916091354096'}, {'3': '7.5672108673415535'}, {'4': '7.637064619895102'}, {'5': '7.720915100656052'}, {'6': '7.490051114461795'}, {'7': '6.928957444758298'}, {'8': '7.16821872487133'}, {'9': '7.039214440476867'}, {'10': '7.120982463786526'}, {'11': '6.21616996330751'}, {'12': '7.295630559150147'}, {'13': '5.982736589164141'}, {'14': '7.060754048548912'}, {'15': '8.983609883380087'}, {'16': '7.270834037266594'}, {'17': '7.713528897322899'}, {'18': '6.936594733643531'}, {'19': '7.348311378538934'}, {'20': '8.135236593083986'}, {'21': '6.847951989566983'}, {'22': '6.81572467263777'}, {'23': '8.553409254058574'}, {'24': '7.6065786092337015'}, {'25': '7.305533277178942'}, {'26': '7.63025040915607'}, {'27': '7.185150769945016'}, {'28': '7.55834341553222'}, {'29': '7.675549150171413'}, {'30': '7.682033684453881'}, {'31': '5.590904898674559'}, {'32': '6.84506104729608'}, {'34': '7.143123691454922'}, {'36': '7.0086421094782505'}, {'37': '6.677766374213658

AttributeError: 'list' object has no attribute 'values'

In [50]:
df1 = recomm_game_by_surprise_sortbyiid(algo, 'Parsat', total_games, games, top_n=10)

In [44]:
df = pd.DataFrame(index=total_games)

In [45]:
print(df)

Empty DataFrame
Columns: []
Index: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 108, 109, 110, ...]

[21631 rows x 0 columns]


In [51]:
df.append(df1, ignore_index = True)

  df.append(df1, ignore_index = True)


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,347900,348065,348406,348463,348955,349129,349131,349805,350736,350992
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43257,,,,,,,,,,,...,,,,,,5.910191355490424,,,,
43258,,,,,,,,,,,...,,,,,,,7.494511022136237,,,
43259,,,,,,,,,,,...,,,,,,,,7.765458840488366,,
43260,,,,,,,,,,,...,,,,,,,,,8.183498679400003,


In [52]:
print(df)

Empty DataFrame
Columns: []
Index: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102, 103, 104, 105, 106, 108, 109, 110, ...]

[21631 rows x 0 columns]


In [97]:
idx = pd.DataFrame(total_games)

In [98]:
res_df = pd.concat([df1,df2,idx],axis=1,ignore_index=True)

In [99]:
print(res_df)

              0         1       2
0      8.164557  7.453029       1
1      7.397916  7.740396       2
2      7.567211  7.587645       3
3      7.637065  7.549040       4
4      7.720915  7.886830       5
...         ...       ...     ...
21626  5.910191  6.322198  349129
21627  7.494511  7.578896  349131
21628  7.765459  7.715931  349805
21629  8.183499  8.496467  350736
21630  8.851930  8.830711  350992

[21631 rows x 3 columns]


In [102]:
res_df = res_df.set_index(res_df.columns[2])

In [103]:
meandf = res_df.mean(axis='columns')

In [107]:
print(list(meandf))

[7.808792817814024, 7.569155965202486, 7.577428081812487, 7.593052150550699, 7.803872382068229, 7.631740504973096, 7.170174980815691, 7.212184764114653, 7.143171397458836, 7.002178383685147, 6.316417232086022, 7.360016726214349, 5.703210568612483, 7.187193529206532, 8.926997016399518, 7.480998335737272, 7.704057401805407, 7.528759317151785, 7.133116109902991, 8.361194281222062, 6.92363490684446, 7.387088133882176, 8.339415490112707, 7.738659432301753, 6.950445950567213, 7.4997547212894204, 7.021393204455495, 7.267492793876155, 7.680133121120093, 7.850004816083336, 5.792985606447235, 6.9592618459430815, 7.136821776678442, 7.162193741290434, 6.648320814026533, 7.055725845035969, 6.761367561576544, 7.594162291301941, 7.100456920406439, 6.940797250751338, 7.274074045326707, 7.396940880191248, 7.364395628901086, 7.402857153097217, 7.772526508769317, 6.936935492053932, 7.1018101263940965, 6.654775888717343, 7.355031023941716, 6.866081611461787, 7.497586892626, 7.393335410663994, 5.7522648427

In [105]:
print(sorted(meandf[1]))

TypeError: 'numpy.float64' object is not iterable

In [106]:
meandf.argsort()

2
1         19683
2         17904
3         12375
4          1048
5         16017
          ...  
349129    19583
349131    18997
349805    18573
350736    21416
350992    21613
Length: 21631, dtype: int64

In [108]:
result = np.sort(meandf,axis=1 )

NameError: name 'np' is not defined

In [109]:
import numpy as np


In [110]:
result = pd.DataFrame(meandf)

In [111]:
print(result)

               0
2               
1       7.808793
2       7.569156
3       7.577428
4       7.593052
5       7.803872
...          ...
349129  6.116195
349131  7.536704
349805  7.740695
350736  8.339983
350992  8.841320

[21631 rows x 1 columns]


In [114]:
result = result.sort_values(by=result.columns[0],ascending=False)

In [115]:
print(result)

               0
2               
345976  9.731237
331212  9.574243
254127  9.533561
260234  9.499706
273814  9.468981
...          ...
203739  2.807955
1406    2.729848
144110  2.239240
240624  2.108536
276022  1.521036

[21631 rows x 1 columns]


In [134]:
li=result.iloc[0:10]

In [135]:
li.index.name = 'index'

In [130]:
print(li)

[[9.731237158056992], [9.57424269894161], [9.533561252939592], [9.499706017665874], [9.468980940221076], [9.455976592987193], [9.442178616612276], [9.404970802988217], [9.401252586245107], [9.38126736372459]]


In [136]:
li.columns

RangeIndex(start=0, stop=1, step=1)

In [137]:
li.index

Int64Index([345976, 331212, 254127, 260234, 273814, 259435, 262201, 281515,
            322354, 148213],
           dtype='int64', name='index')

In [138]:
list(li.index)

[345976,
 331212,
 254127,
 260234,
 273814,
 259435,
 262201,
 281515,
 322354,
 148213]

[{'gameId': '12896', 'rating': '9.731237158056992'}, {'gameId': '15542', 'rating': '9.57424269894161'}, {'gameId': '84913', 'rating': '9.533561252939592'}, {'gameId': '65559', 'rating': '9.499706017665874'}, {'gameId': '39914', 'rating': '9.468980940221076'}, {'gameId': '66537', 'rating': '9.455976592987193'}, {'gameId': '60872', 'rating': '9.442178616612276'}, {'gameId': '35376', 'rating': '9.404970802988217'}, {'gameId': '17995', 'rating': '9.401252586245107'}, {'gameId': '246855', 'rating': '9.38126736372459'}, {'gameId': '157026', 'rating': '9.369823750606312'}, {'gameId': '30261', 'rating': '9.353590989795983'}, {'gameId': '94002', 'rating': '9.344582178243506'}, {'gameId': '13356', 'rating': '9.322114051808725'}, {'gameId': '183883', 'rating': '9.319257411106067'}, {'gameId': '39345', 'rating': '9.300648226655102'}, {'gameId': '34377', 'rating': '9.287372998295872'}, {'gameId': '16933', 'rating': '9.28551716919536'}, {'gameId': '58421', 'rating': '9.269977671455411'}, {'gameId': 

In [159]:
def get_unplayed_surprise(ratings, total_games, userID):
    played_games = ratings[ratings['user']==userID]['ID'].tolist()
    unplayed_games = [game for game in total_games if game not in played_games]
    print(f'특정{userID} 유저가 플레이한 게임 수: {len(played_games)}\n 추천한 게임 개수: {len(unplayed_games)}\n 전체 게임수:{len(total_games)}')
    return unplayed_games

def recomm_game_by_surprise(algo, userID, unplayed_games,games, top_n=10):
    predictions = [algo.predict(str(userID), str(gameId)) for gameId in unplayed_games]

    def sortkey_est(pred):
        return pred.est
    predictions.sort(key=sortkey_est, reverse=True)
    top_predictions = predictions[:top_n]
    top_game_ids = [int(pred.iid) for pred in top_predictions]
    top_game_ratings = [pred.est for pred in top_predictions]
    top_game_titles = games[games.id.isin(top_game_ids)]['primary']
    top_game_preds = [(ids, ratings, title) for ids, ratings, title in zip(top_game_ids, top_game_ratings, top_game_titles)]
    for top_game in top_game_preds:
        print('추천게임이름',top_game[2])
        print('예측평점:',top_game[1])
        print()     
    return top_game_preds

In [160]:
unplayedlist =  get_unplayed_surprise(ratings, total_games,'Parsat')

특정Parsat 유저가 플레이한 게임 수: 178
 추천한 게임 개수: 21453
 전체 게임수:21631


In [161]:
recomm_game_by_surprise(algo, 'Parsat', unplayedlist,games, top_n=10)

추천게임이름 Tainted Grail: The Fall of Avalon
예측평점: 10

추천게임이름 Descent: Legends of the Dark
예측평점: 10

추천게임이름 Company of Heroes
예측평점: 9.780225013851242

추천게임이름 Brazil: Imperial
예측평점: 9.693497882629988

추천게임이름 World At War 85: Storming the Gap
예측평점: 9.653038078439607

추천게임이름 Deliverance
예측평점: 9.632032425513733

추천게임이름 Europa Universalis: The Price of Power
예측평점: 9.630853418240328

추천게임이름 Middle-earth Strategy Battle Game: The Lord of the Rings – Battle of Pelennor Fields
예측평점: 9.625957757394742

추천게임이름 Ostroleka 26 maja 1831
예측평점: 9.577753563587885

추천게임이름 Aeon's End: Legacy of Gravehold
예측평점: 9.541236718309982



[(264220, 10, 'Tainted Grail: The Fall of Avalon'),
 (273814, 10, 'Descent: Legends of the Dark'),
 (254127, 9.780225013851242, 'Company of Heroes'),
 (249277, 9.693497882629988, 'Brazil: Imperial'),
 (259435, 9.653038078439607, 'World At War 85: Storming the Gap'),
 (331212, 9.632032425513733, 'Deliverance'),
 (322708, 9.630853418240328, 'Europa Universalis: The Price of Power'),
 (209877,
  9.625957757394742,
  'Middle-earth Strategy Battle Game: The Lord of the Rings – Battle of Pelennor Fields'),
 (188124, 9.577753563587885, 'Ostroleka 26 maja 1831'),
 (281515, 9.541236718309982, "Aeon's End: Legacy of Gravehold")]