# 영화 추천 시스템
## Demographic Filtering (인구통계학적 필터링)

In [1]:
import pandas as pd
import numpy as np

df = pd.read_json('all_tv_shows.json')

In [2]:
df.head()

Unnamed: 0,adult,backdrop_path,genre_ids,id,origin_country,original_language,original_name,overview,popularity,poster_path,first_air_date,name,vote_average,vote_count
0,False,/qFfWFwfaEHzDLWLuttWiYq7Poy2.jpg,[10767],2261,[US],en,The Tonight Show Starring Johnny Carson,,706.5629,/uSvET5YUvHNDIeoCpErrbSmasFb.jpg,1962-10-01,The Tonight Show Starring Johnny Carson,7.463,81
1,False,/ottT2Yt0OfHiHp3PHJTLNVV8JPE.jpg,"[18, 10766]",13945,[DE],de,"Gute Zeiten, schlechte Zeiten",,682.2134,/qujVFLAlBnPU9mZElV4NZgL8iXT.jpg,1992-05-11,"Gute Zeiten, schlechte Zeiten",5.7,38
2,False,/m0bV3qBiJBBlpFaaKjwHo13MVjm.jpg,"[35, 10767]",14981,[US],en,The Late Late Show with Craig Ferguson,,624.1593,/gGC7zSDgG0FY0MbM1pjfhTCWQBI.jpg,2005-01-03,The Late Late Show with Craig Ferguson,6.832,101
3,False,/kA50bkSC6bw5ZkutYrN9sLD9CZ9.jpg,"[10767, 35]",22980,[US],en,Watch What Happens Live with Andy Cohen,,615.9783,/onSD9UXfJwrMXWhq7UY7hGF2S1h.jpg,2009-07-16,Watch What Happens Live with Andy Cohen,5.103,68
4,False,/h0y3OzHzG4yNvn8u3Za6ByH8lrQ.jpg,"[18, 10766]",45789,[DE],de,Sturm der Liebe,,544.5528,/jfFNydakwvbeACEwSd2Gh8UWtba.jpg,2005-09-26,Sturm der Liebe,6.066,38


In [3]:
df.shape

(10000, 14)

In [4]:
df.columns

Index(['adult', 'backdrop_path', 'genre_ids', 'id', 'origin_country',
       'original_language', 'original_name', 'overview', 'popularity',
       'poster_path', 'first_air_date', 'name', 'vote_average', 'vote_count'],
      dtype='object')

In [5]:
c = df['vote_average'].mean()
c

5.5497494

In [6]:
m = df['vote_count'].quantile(0.9)
m

369.0

In [7]:
q_dramas = df.copy().loc[df['vote_count'] >= m]
q_dramas.shape

(1002, 14)

In [8]:
q_dramas['vote_count'].sort_values()

7538      369
7527      369
1817      369
9424      370
670       371
        ...  
733     15785
260     16952
801     18294
2941    18894
68      24985
Name: vote_count, Length: 1002, dtype: int64

In [9]:
def weighted_rating(x, m=m, c=c):
    v = x['vote_count']
    R = x['vote_average']
    return (v / (v + m) * R) + (m / (m + v) * c)

In [10]:
q_dramas['score'] = q_dramas.apply(weighted_rating, axis=1)
q_dramas.head(3)

Unnamed: 0,adult,backdrop_path,genre_ids,id,origin_country,original_language,original_name,overview,popularity,poster_path,first_air_date,name,vote_average,vote_count,score
5,False,/oRdc2nn7jLOYy4fBdvmFKPsKzZE.jpg,"[80, 18, 9648]",2734,[US],en,Law & Order: Special Victims Unit,추악한 성범죄를 수사하는 뉴욕 경찰 성범죄전담수사반의 활약을 그린 드라마.,515.5464,/abWOCrIo7bbAORxcQyOFNJdnnmR.jpg,1999-09-20,로 앤 오더: 성범죄전담반,7.9,3936,7.69855
12,False,/1axH9IYz5fCHVUDNXLzivKEQObK.jpg,"[10763, 35]",2224,[US],en,The Daily Show,,411.1208,/ixcfyK7it6FjRM36Te4OdblAq4X.jpg,1996-07-22,The Daily Show,6.4,570,6.065876
28,False,/lY2DhbA7Hy44fAKddr06UrXWWaQ.jpg,[18],100088,[US],en,The Last of Us,"2003년, 기생 곰팡이가 세상을 휩쓸고 감염자가 생겨난다. 20년 후, 조엘은 격...",294.3984,/lXQh6FJGbo1xe4vE9WpyQDEUGU2.jpg,2023-01-15,더 라스트 오브 어스,8.569,5946,8.392578


In [11]:
q_dramas = q_dramas.sort_values('score', ascending=False)
q_dramas[['name', 'genre_ids', 'id', 'vote_count', 'vote_average', 'score']].head(10)

Unnamed: 0,name,genre_ids,id,vote_count,vote_average,score
246,브레이킹 배드,"[18, 80]",1396,15533,8.926,8.847655
223,릭 앤 모티,"[16, 35, 10765, 10759]",60625,10141,8.688,8.577818
2946,아케인,"[16, 10765, 18, 10759]",94605,5123,8.776,8.559233
3395,진격의 거인,"[16, 10765, 10759]",1429,6752,8.7,8.536759
801,기묘한 이야기,"[18, 10765, 9648]",66732,18294,8.595,8.53479
3465,체르노빌,[18],87108,6826,8.683,8.522309
719,베터 콜 사울,"[80, 18]",60059,5631,8.7,8.50626
3161,아바타 아앙의 전설,"[16, 10759, 10765]",246,4338,8.746,8.495434
5750,귀멸의 칼날,"[16, 10759, 10765]",85937,6690,8.649,8.486991
997,원피스,"[10759, 35, 16]",37854,4870,8.7,8.478117


In [12]:
q_dramas.columns = ['adult', 'backdrop_path', 'genre_ids', 'id', 'origin_country',
       'original_language', 'original_name', 'overview', 'popularity',
       'poster_path', 'first_air_date', 'title', 'vote_average', 'vote_count',
       'score']
q_dramas[['title', 'genre_ids','id', 'vote_count', 'vote_average', 'score']].head(10)

Unnamed: 0,title,genre_ids,id,vote_count,vote_average,score
246,브레이킹 배드,"[18, 80]",1396,15533,8.926,8.847655
223,릭 앤 모티,"[16, 35, 10765, 10759]",60625,10141,8.688,8.577818
2946,아케인,"[16, 10765, 18, 10759]",94605,5123,8.776,8.559233
3395,진격의 거인,"[16, 10765, 10759]",1429,6752,8.7,8.536759
801,기묘한 이야기,"[18, 10765, 9648]",66732,18294,8.595,8.53479
3465,체르노빌,[18],87108,6826,8.683,8.522309
719,베터 콜 사울,"[80, 18]",60059,5631,8.7,8.50626
3161,아바타 아앙의 전설,"[16, 10759, 10765]",246,4338,8.746,8.495434
5750,귀멸의 칼날,"[16, 10759, 10765]",85937,6690,8.649,8.486991
997,원피스,"[10759, 35, 16]",37854,4870,8.7,8.478117


In [14]:
q_dramas[['title', 'genre_ids', 'id','poster_path', 'vote_count', 'vote_average', 'score']].to_json("dramas_top10.json", orient='records', force_ascii=False, indent=2)