In [26]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds
import pickle

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

import motor.motor_asyncio
# 데이터 불러오기
def load_data():
    # result = get_player_prev_solo_rank()
    # score_data = pd.json_normalize(result)
    score_data = pd.read_csv('data/score_data.csv')
    champ_data = pd.read_csv('data/champ_data.csv')
    return score_data, champ_data

# 데이터 전처리
def preprocess_data(score_data, champ_data):
    # 피벗테이블 형태로 만들기
    user_champ_score = score_data.pivot(
        index='user_id',
        columns='champion',
        values='score'
    ).fillna(0)

    # champ_user_score = user_champ_score.T.to_numpy()

    matrix = user_champ_score.to_numpy()

    print(matrix)

    # 정규화
    # 유저의 평균 Score(영화의 별점)을 각 유저의 score에서 감산
    user_scores_mean = np.mean(matrix, axis = 1)

    matrix_user_mean = matrix - user_scores_mean.reshape(-1, 1)

    # svd(특이값 분해) 모델링 부분
    U, sigma, Vt = svds(matrix_user_mean, k = 12)
    # sigma는 0이 아닌 특이값의 나열(1차원 행렬)으로(sigma.shape (12,)), 0이 포함된 대칭행렬을 사용하기 위해 np.diag 적용(12,12)
    sigma = np.diag(sigma)

    # U, Sigma, Vt의 내적을 수행하면, 다시 원본 행렬로 복원이 된다. 
    # 거기에 + 사용자 평균 rating을 적용한다. 
    svd_user_predicted_scores = np.dot(np.dot(U, sigma), Vt) + user_scores_mean.reshape(-1, 1)
    # df_svd_preds = pd.DataFrame(svd_user_predicted_scores, columns = user_champ_score.columns)
    return pd.DataFrame(svd_user_predicted_scores, columns = user_champ_score.columns)

# 추천 함수
def recommend_champs(df_svd_preds, user_id, ori_champs_df, ori_scores_df, num_recommendations=5):
    
    # index와 user_id를 맞추는 부분. 현재는 index와 user_id 둘 다 0부터 시작하므로 변화 x.
    user_row_number = user_id 
    
    # 최종적으로 만든 pred_df에서 사용자 index에 따라 챔피언 데이터 정렬 -> 챔피언 평점이 높은 순으로 정렬 됨
    sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False)

    # 원본 평점 데이터에서 user id에 해당하는 데이터를 뽑아낸다. 
    user_data = ori_scores_df[ori_scores_df.user_id == user_id]
    
    # 위에서 뽑은 user_data와 원본 챔피언 데이터를 합친다. 
    user_history = pd.merge(user_data, ori_champs_df, left_on = 'champion', right_on='name', how='inner').sort_values(['score'], ascending=False)
    print(user_history)
    # 원본 챔피언 데이터에서 사용자가 본 챔피언 데이터를 제외한 데이터를 추출
    recommendations = ori_champs_df[~ori_champs_df['champion'].isin(user_history['id'])]

    # 사용자의 챔피언 평점이 높은 순으로 정렬된 데이터와 위 recommendations을 합친다. 
    recommendations = recommendations.merge( pd.DataFrame(sorted_user_predictions).reset_index(), on = 'id')
    # 컬럼 이름 바꾸고 정렬해서 return
    recommendations = recommendations.rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :]
                      

    return user_history, recommendations


if __name__ == "__main__":
    score_data, champ_data = load_data()
    df_svd_preds = preprocess_data(score_data, champ_data)
    # 데이터를 pickle 파일로 저장
    file_path = 'models/df_svd_preds.pkl'
    with open(file_path, 'wb') as f:
        pickle.dump(df_svd_preds, f)

    
    


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [17]:
score_data, champ_data = load_data()

In [18]:
print(score_data)

     user_id champion  score
0          1    Shaco  25.84
1          1    Brand  10.01
2          1   Xerath   2.97
3          4    Yuumi  57.72
4          4    Milio  31.80
..       ...      ...    ...
435      100     Ashe  10.07
436      100   Ezreal   4.00
437      100    Sivir   9.94
438      100   Samira   7.98
439      100   Draven   3.96

[440 rows x 3 columns]


In [12]:
print(champ_data)

     champion_key           id       name  difficulty
0               1        Annie         애니           6
1               2         Olaf        올라프           3
2               3        Galio        갈리오           5
3               4  TwistedFate  트위스티드 페이트           9
4               5      XinZhao       신 짜오           2
..            ...          ...        ...         ...
162           897       KSante        크산테           9
163           901      Smolder        스몰더           6
164           902        Milio        밀리오           5
165           910         Hwei        흐웨이           9
166           950      Naafiri        나피리           2

[167 rows x 4 columns]


In [24]:
df_svd_preds = preprocess_data(score_data, champ_data)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [20]:
df_svd_preds

champion,Aatrox,Ahri,Akali,Akshan,Alistar,Annie,Aphelios,Ashe,Azir,Bard,...,Xerath,Yasuo,Yone,Yuumi,Zac,Zed,Zeri,Ziggs,Zilean,Zoe
0,0.007161,0.232412,0.235164,0.466847,0.530469,0.437757,0.350769,0.293066,0.376631,0.449825,...,0.458043,0.154856,-0.239825,-0.219316,0.575781,0.410640,0.425673,0.447788,0.431028,0.365333
1,1.266825,1.261756,0.374909,0.389702,-1.011243,0.708771,5.694430,2.902907,0.663070,0.862704,...,0.594218,1.300141,1.822952,51.941962,-0.802698,0.470521,0.816973,0.750842,0.694386,0.404396
2,0.052984,1.446903,0.389752,0.115046,1.637840,0.113271,-3.133722,1.111124,0.216915,0.115930,...,0.098519,-0.389345,-0.049846,0.735575,-0.384763,0.094949,0.168552,0.245068,0.112639,0.372101
3,-0.190672,2.083033,0.206822,0.044947,2.623565,-0.007155,-0.023474,2.432382,0.140507,-0.003955,...,0.018375,-0.719426,-0.751453,-0.616457,0.002853,-0.087276,0.374853,0.195324,-0.007596,0.366705
4,3.844660,2.449615,4.973320,-0.136615,-1.625811,-0.076206,-1.913206,0.601108,1.656600,-0.086996,...,-0.164787,6.026938,13.767637,-0.613229,-1.248045,1.058230,0.104557,-0.095587,-0.076945,2.165652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,-0.792375,0.058079,-0.143957,1.024990,0.701568,0.964315,3.605069,0.608596,0.616283,1.010316,...,1.020978,0.101552,-1.521727,0.526020,1.521982,0.807676,0.995878,0.967149,0.939222,0.481771
79,0.612592,0.980621,-0.034091,0.607801,0.456672,0.629063,5.513772,1.503853,0.459410,0.651925,...,0.633435,2.123270,2.588825,-0.740881,1.588680,0.759887,0.913730,0.643613,0.609796,0.263194
80,2.643780,1.140123,0.975856,1.426647,2.249487,1.321603,0.682080,1.685266,1.299468,1.390542,...,1.452732,0.413876,-0.775089,-1.139773,1.604960,1.233663,1.421972,1.431002,1.288180,1.243858
81,-0.231677,1.014067,-0.065589,0.062301,1.280946,0.042937,1.290359,1.396839,0.053783,0.045336,...,0.057871,-0.393141,-0.671376,-0.354348,0.136485,-0.039011,0.281712,0.140215,0.041784,0.133461


In [21]:
recommend_champs(df_svd_preds, 4, champ_data, score_data, 3)

Empty DataFrame
Columns: [user_id, champion, score, champion_key, id, name, difficulty]
Index: []


KeyError: 'champion'