In [12]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import time
 
critics = {
    
    
    '조용필': {
        '택시운전사': 2.5,
        '겨울왕국': 3.5,
        '리빙라스베가스': 3.0,
        '넘버3': 3.5,
        '사랑과전쟁': 2.5,
        '세계대전': 3.0,
    },
    'BTS': {
        '택시운전사': 1.0,
        '겨울왕국': 4.5,
        '리빙라스베가스': 0.5,
        '넘버3': 1.5,
        '사랑과전쟁': 4.5,
        '세계대전': 5.0,
    },
    '강감찬': {
        '택시운전사': 3.0,
        '겨울왕국': 3.5,
        '리빙라스베가스': 1.5,
        '넘버3': 5.0,
        '세계대전': 3.0,
        '사랑과전쟁': 3.5,
    },
    '을지문덕': {
        '택시운전사': 2.5,
        '겨울왕국': 3.0,
        '넘버3': 3.5,
        '세계대전': 4.0,
    },
    '김유신': {
        '겨울왕국': 3.5,
        '리빙라스베가스': 3.0,
        '세계대전': 4.5,
        '넘버3': 4.0,
        '사랑과전쟁': 2.5,
    },
    '유성룡': {
        '택시운전사': 3.0,
        '겨울왕국': 4.0,
        '리빙라스베가스': 2.0,
        '넘버3': 3.0,
        '세계대전': 3.5,
        '사랑과전쟁': 2.0,
    },
    '이황': {
        '택시운전사': 3.0,
        '겨울왕국': 4.0,
        '세계대전': 3.0,
        '넘버3': 5.0,
        '사랑과전쟁': 3.5,
    },
    '이이': {'겨울왕국': 4.5, '사랑과전쟁': 1.0,
             '넘버3': 4.0},
}
 
 

In [13]:
def sim_viewer(movie_df, names):
    
    names = [ names for names in movie_df.keys() ]
    mat   = np.zeros((len(names) , len(names)))
 
    for i in range(len(names)):
        for k in range(len(names)):
            common_movies  = set(movie_df[names[i]].keys()) & (movie_df[names[k]].keys())
            list_movieDist =  np.array([ movie_df[names[i]][co_movie] - movie_df[names[k]][co_movie]  for co_movie in common_movies ])        
            mat[i][k] = 1 / (np.sqrt(list_movieDist.T @ list_movieDist) + 1)
    
    return mat
 


In [14]:
def get_NotViewed(movie_df,person1):
    movieList = []
    for dic in movie_df.values():
        movieList += list(dic.keys())
        
    movieList = list(set(movieList) - movie_df[person1].keys())
    return movieList



In [16]:
def get_allMovies(movie_df, movieList):
    movieList = set()
    for movie in critics.values():
        movieList |= movie.keys()
    return movieList
 

In [18]:
def recommend(movie_df,sim_mat, person):
    notViedMovies = get_NotViewed(movie_df, person)
 
    names       = np.array([ name  for name in critics.keys() ])
    person_idx  = np.where(names == person)[0][0]
    sim_arr     = sim_mat[person_idx]
    
    print(person, "이(가) 보지 않은 영화 : " , notViedMovies)
    
    rating_mat = np.zeros((len(names), len(notViedMovies)))
    
    ## 이 사람이 보지 않은 영화에 대해서 다른 사람이 준 평점.
    ## 행 names 열 movies
    for i in range(len(names)):
         for k in range(len(notViedMovies)):
            rating_mat[i][k] = movie_df[names[i]].get(notViedMovies[k])
            
    ## 각 영화 평점 * 그 사람과의 유사도의 합
    movie_mat = np.nansum(rating_mat * sim_arr.reshape(8,1), axis = 0)
    
    ## 각 영화를 본 사람만 추려서 그사람들의 유사도합 구하기
    sumOfSim = np.zeros((len(notViedMovies), 1))
 
    for i in range(len(rating_mat[0])):
        watched_idx = np.where( np.isnan(rating_mat[:,i]) != True  )[0]
        for k in watched_idx:
            sumOfSim[i] += sim_arr[k]
        
    ## 각 영화별 계산 결과
    res_rating = movie_mat / sumOfSim.T
    return notViedMovies[np.argmax(res_rating)]
 


In [20]:
names = np.array([ name  for name in critics.keys() ])
names


array(['조용필', 'BTS', '강감찬', '을지문덕', '김유신', '유성룡', '이황', '이이'], dtype='<U4')

In [21]:
movieList = get_allMovies(critics)
movieList


TypeError: get_allMovies() missing 1 required positional argument: 'movieList'

In [22]:
sim_mat = sim_viewer(critics, names)
sim_mat


array([[1.        , 0.17740533, 0.29429806, 0.47213595, 0.38742589,
        0.4       , 0.34054243, 0.34833148],
       [0.17740533, 1.        , 0.17176743, 0.24496553, 0.19182537,
        0.19404997, 0.17740533, 0.18863786],
       [0.29429806, 0.17176743, 1.        , 0.34054243, 0.28172905,
        0.27429189, 0.66666667, 0.2582457 ],
       [0.47213595, 0.24496553, 0.34054243, 1.        , 0.53589838,
        0.43050087, 0.32037724, 0.38742589],
       [0.38742589, 0.19182537, 0.28172905, 0.53589838, 1.        ,
        0.34833148, 0.32037724, 0.35678917],
       [0.4       , 0.19404997, 0.27429189, 0.43050087, 0.34833148,
        1.        , 0.28172905, 0.4       ],
       [0.34054243, 0.17740533, 0.66666667, 0.32037724, 0.32037724,
        0.28172905, 1.        , 0.26747889],
       [0.34833148, 0.18863786, 0.2582457 , 0.38742589, 0.35678917,
        0.4       , 0.26747889, 1.        ]])

In [23]:
res = recommend(critics, sim_mat, '이이')
"추천 영화 : {}".format(res)

이이 이(가) 보지 않은 영화 :  ['택시운전사', '리빙라스베가스', '세계대전']


'추천 영화 : 세계대전'