# 영화 평점 분석 실습

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

## 1. 영화 평점 데이터 적재 및 전처리

In [2]:
# 사용자 데이터 읽어오기
users = pd.read_csv('data/movielens/users.dat', sep = '::', engine = 'python',
                   names = ['사용자아이디', '성별','연령','직업','지역'])
users.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117
3,4,M,45,7,2460
4,5,M,25,20,55455


In [3]:
# 평점 데이터 읽어오기
ratings = pd.read_csv('data/movielens/ratings.dat', sep = '::', engine = 'python',
                   names = ['사용자아이디', '영화아이디','평점','타임스탬프'])
ratings.head()

Unnamed: 0,사용자아이디,영화아이디,평점,타임스탬프
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
# 영화데이터 읽어오기
movies = pd.read_csv('data/movielens/movies.dat', sep = '::', engine = 'python',
                   names = ['영화아이디','영화제목','장르'], encoding = 'latin-1')
movies.head()

Unnamed: 0,영화아이디,영화제목,장르
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
users.shape

(6040, 5)

In [6]:
ratings.shape

(1000209, 4)

In [7]:
movies.shape

(3883, 3)

In [8]:
print('사용자 수:', len(users))
print('평점 수:', len(ratings))
print('영화 수:', len(movies))

사용자 수: 6040
평점 수: 1000209
영화 수: 3883


In [9]:
#평점을 한번도 안 준 사용자가 있는가? -> 없음
ratings.사용자아이디.nunique()

6040

In [10]:
#평점이 하나도 없는 영화 있나? -> 177개 영화의 평점이 없음
ratings.영화아이디.nunique()

3706

In [11]:
#영화제목이 중복되는게 있는지 체크
#영화 아이디는 고유한데 제목이 중복이 있을 수도 있음
movies.nunique()

영화아이디    3883
영화제목     3883
장르        301
dtype: int64

In [12]:
#3개의 데이터프레임을 하나로 합치기
data = pd.merge(users, ratings)
data = pd.merge(data, movies)
data.head()

Unnamed: 0,사용자아이디,성별,연령,직업,지역,영화아이디,평점,타임스탬프,영화제목,장르
0,1,F,1,10,48067,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama
1,2,M,56,16,70072,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama
2,12,M,25,12,32793,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama
3,15,M,25,7,22903,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama
4,17,M,50,1,95350,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama


## 2. 보고 싶은 영화 찾기
영화들의 평점 평균을 구하여, 사람들에게 인정받는 (평점이 높은) 영화 찾기

In [13]:
# 영화들의 평점 평균을 구하여, 평점이 높은 영화 찾기 
# data.pivot_table(index = ['영화아이디','영화제목'],
#                 values = '평점',
#                 aggfunc='mean').sort_values('평점',ascending=False)

data.pivot_table(index = ['영화아이디','영화제목'],
                values = '평점',
                aggfunc='mean').nlargest(10,'평점')

Unnamed: 0_level_0,Unnamed: 1_level_0,평점
영화아이디,영화제목,Unnamed: 2_level_1
787,"Gate of Heavenly Peace, The (1995)",5.0
989,Schlafes Bruder (Brother of Sleep) (1995),5.0
1830,Follow the Bitch (1998),5.0
3172,Ulysses (Ulisse) (1954),5.0
3233,Smashing Time (1967),5.0
3280,"Baby, The (1973)",5.0
3382,Song of Freedom (1936),5.0
3607,One Little Indian (1973),5.0
3656,Lured (1947),5.0
3881,Bittersweet Motel (2000),5.0


평균 평점이 만점인 영화들이 최상위에 위치함. 
일반적으로 평점이 만점인 경우는 대부분 평점의 개수가 매우 적은 경우이므로, 이를 확인하기 위해 평점의 개수도 함께 구해본다. 

In [14]:
data.pivot_table(index = ['영화아이디','영화제목'],
                values = '평점',
                aggfunc=['mean','count'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count
Unnamed: 0_level_1,Unnamed: 1_level_1,평점,평점
영화아이디,영화제목,Unnamed: 2_level_2,Unnamed: 3_level_2
1,Toy Story (1995),4.146846,2077
2,Jumanji (1995),3.201141,701
3,Grumpier Old Men (1995),3.016736,478
4,Waiting to Exhale (1995),2.729412,170
5,Father of the Bride Part II (1995),3.006757,296
...,...,...,...
3948,Meet the Parents (2000),3.635731,862
3949,Requiem for a Dream (2000),4.115132,304
3950,Tigerland (2000),3.666667,54
3951,Two Family House (2000),3.900000,40


In [15]:
data.pivot_table(index = ['영화아이디','영화제목'],
                values = '평점',
                aggfunc=['mean','count']).nlargest(10,('mean','평점'))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,count
Unnamed: 0_level_1,Unnamed: 1_level_1,평점,평점
영화아이디,영화제목,Unnamed: 2_level_2,Unnamed: 3_level_2
787,"Gate of Heavenly Peace, The (1995)",5.0,3
989,Schlafes Bruder (Brother of Sleep) (1995),5.0,1
1830,Follow the Bitch (1998),5.0,1
3172,Ulysses (Ulisse) (1954),5.0,1
3233,Smashing Time (1967),5.0,2
3280,"Baby, The (1973)",5.0,1
3382,Song of Freedom (1936),5.0,1
3607,One Little Indian (1973),5.0,1
3656,Lured (1947),5.0,1
3881,Bittersweet Motel (2000),5.0,1


In [16]:
#평점이 평균 4.5이상이고 평점의 개수도 100개 이상이 영화 선청
ex = data.pivot_table(index = ['영화아이디','영화제목'],
                values = '평점',
                aggfunc=['mean','count'])

ex.columns =['평균', '개수']
ex

Unnamed: 0_level_0,Unnamed: 1_level_0,평균,개수
영화아이디,영화제목,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Toy Story (1995),4.146846,2077
2,Jumanji (1995),3.201141,701
3,Grumpier Old Men (1995),3.016736,478
4,Waiting to Exhale (1995),2.729412,170
5,Father of the Bride Part II (1995),3.006757,296
...,...,...,...
3948,Meet the Parents (2000),3.635731,862
3949,Requiem for a Dream (2000),4.115132,304
3950,Tigerland (2000),3.666667,54
3951,Two Family House (2000),3.900000,40


In [17]:
ex[(ex['평균']>=4.5) & (ex['개수']>1000)]

Unnamed: 0_level_0,Unnamed: 1_level_0,평균,개수
영화아이디,영화제목,Unnamed: 2_level_1,Unnamed: 3_level_1
50,"Usual Suspects, The (1995)",4.517106,1783
318,"Shawshank Redemption, The (1994)",4.554558,2227
527,Schindler's List (1993),4.510417,2304
858,"Godfather, The (1972)",4.524966,2223


## [실습 #1] 여자들이 좋아하는 영화 찾기 
### - 여성 평점이 4.0 이상이고 여성 평점의 개수가 500개 이상인 영화

In [18]:
# data[data.성별 == 'F'].pivot_table(index = '영화제목', values = '평점', aggfunc =['mean','count'])
ex1 = data.pivot_table(index='영화제목', columns='성별', values='평점', aggfunc =['mean','count'])

In [19]:
#여성 데이터 가져오기
ex1_F = ex1.xs('F', axis=1, level=1)
ex1_F

Unnamed: 0_level_0,mean,count
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
"$1,000,000 Duck (1971)",3.375000,16.0
'Night Mother (1986),3.388889,36.0
'Til There Was You (1997),2.675676,37.0
"'burbs, The (1989)",2.793478,92.0
...And Justice for All (1979),3.828571,35.0
...,...,...
"Zed & Two Noughts, A (1985)",3.500000,8.0
Zero Effect (1998),3.864407,59.0
Zero Kelvin (Kjærlighetens kjøtere) (1995),,
Zeus and Roxanne (1997),2.777778,9.0


In [20]:
여성인기영화 = ex1_F[(ex1_F['mean']>=4.0)&(ex1_F['count']>=500)]

In [21]:
여성인기영화

Unnamed: 0_level_0,mean,count
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
American Beauty (1999),4.238901,946.0
Being John Malkovich (1999),4.15993,569.0
Braveheart (1995),4.016484,546.0
Casablanca (1942),4.30099,505.0
E.T. the Extra-Terrestrial (1982),4.08985,601.0
Fargo (1996),4.217656,657.0
Forrest Gump (1994),4.045031,644.0
L.A. Confidential (1997),4.106007,566.0
"Matrix, The (1999)",4.128405,514.0
"Princess Bride, The (1987)",4.342767,636.0


## [실습 #2] 실습 #1에서 구한 영화(여성인기영화)의 장르를 분석해 보자.
여성인기영화의 장르 통계 구하기

예를 들어, 여성인기영화 중 Drama 장르의 영화는 10개, Action 영화는 3개, ...

In [22]:
movies

Unnamed: 0,영화아이디,영화제목,장르
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
3878,3948,Meet the Parents (2000),Comedy
3879,3949,Requiem for a Dream (2000),Drama
3880,3950,Tigerland (2000),Drama
3881,3951,Two Family House (2000),Drama


In [23]:
여성인기영화.index

Index(['American Beauty (1999)', 'Being John Malkovich (1999)',
       'Braveheart (1995)', 'Casablanca (1942)',
       'E.T. the Extra-Terrestrial (1982)', 'Fargo (1996)',
       'Forrest Gump (1994)', 'L.A. Confidential (1997)', 'Matrix, The (1999)',
       'Princess Bride, The (1987)', 'Pulp Fiction (1994)',
       'Raiders of the Lost Ark (1981)', 'Saving Private Ryan (1998)',
       'Schindler's List (1993)', 'Shakespeare in Love (1998)',
       'Shawshank Redemption, The (1994)', 'Silence of the Lambs, The (1991)',
       'Sixth Sense, The (1999)', 'Star Wars: Episode IV - A New Hope (1977)',
       'Star Wars: Episode V - The Empire Strikes Back (1980)',
       'Toy Story (1995)', 'Wizard of Oz, The (1939)'],
      dtype='object', name='영화제목')

In [24]:
# isin() 함수활용
movies[movies.영화제목.isin(여성인기영화.index)]

Unnamed: 0,영화아이디,영화제목,장르
0,1,Toy Story (1995),Animation|Children's|Comedy
108,110,Braveheart (1995),Action|Drama|War
257,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Fantasy|Sci-Fi
293,296,Pulp Fiction (1994),Crime|Drama
315,318,"Shawshank Redemption, The (1994)",Drama
352,356,Forrest Gump (1994),Comedy|Romance|War
523,527,Schindler's List (1993),Drama|War
589,593,"Silence of the Lambs, The (1991)",Drama|Thriller
604,608,Fargo (1996),Crime|Drama|Thriller
900,912,Casablanca (1942),Drama|Romance|War


In [25]:
#2. 여성인기 영화와 movies 데이터를 합치기
# merge()활용
# reset_index() 활용해서 row를 column으로 바꿈
여성인기영화.reset_index()

Unnamed: 0,영화제목,mean,count
0,American Beauty (1999),4.238901,946.0
1,Being John Malkovich (1999),4.15993,569.0
2,Braveheart (1995),4.016484,546.0
3,Casablanca (1942),4.30099,505.0
4,E.T. the Extra-Terrestrial (1982),4.08985,601.0
5,Fargo (1996),4.217656,657.0
6,Forrest Gump (1994),4.045031,644.0
7,L.A. Confidential (1997),4.106007,566.0
8,"Matrix, The (1999)",4.128405,514.0
9,"Princess Bride, The (1987)",4.342767,636.0


In [26]:
pd.merge(여성인기영화.reset_index(), movies)

Unnamed: 0,영화제목,mean,count,영화아이디,장르
0,American Beauty (1999),4.238901,946.0,2858,Comedy|Drama
1,Being John Malkovich (1999),4.15993,569.0,2997,Comedy
2,Braveheart (1995),4.016484,546.0,110,Action|Drama|War
3,Casablanca (1942),4.30099,505.0,912,Drama|Romance|War
4,E.T. the Extra-Terrestrial (1982),4.08985,601.0,1097,Children's|Drama|Fantasy|Sci-Fi
5,Fargo (1996),4.217656,657.0,608,Crime|Drama|Thriller
6,Forrest Gump (1994),4.045031,644.0,356,Comedy|Romance|War
7,L.A. Confidential (1997),4.106007,566.0,1617,Crime|Film-Noir|Mystery|Thriller
8,"Matrix, The (1999)",4.128405,514.0,2571,Action|Sci-Fi|Thriller
9,"Princess Bride, The (1987)",4.342767,636.0,1197,Action|Adventure|Comedy|Romance


In [27]:
#concat() 활용
#set_index() 활용하면 칼럼이 로우로 바뀜
movies.set_index('영화제목')

Unnamed: 0_level_0,영화아이디,장르
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1
Toy Story (1995),1,Animation|Children's|Comedy
Jumanji (1995),2,Adventure|Children's|Fantasy
Grumpier Old Men (1995),3,Comedy|Romance
Waiting to Exhale (1995),4,Comedy|Drama
Father of the Bride Part II (1995),5,Comedy
...,...,...
Meet the Parents (2000),3948,Comedy
Requiem for a Dream (2000),3949,Drama
Tigerland (2000),3950,Drama
Two Family House (2000),3951,Drama


In [28]:
# 여성인기영화 에 있는 부분만 합치려면 join= inner 사용 (둘 다 있는 내용만 합침)
pd.concat([여성인기영화,movies.set_index('영화제목')], axis=1, join='inner')

Unnamed: 0_level_0,mean,count,영화아이디,장르
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
American Beauty (1999),4.238901,946.0,2858,Comedy|Drama
Being John Malkovich (1999),4.15993,569.0,2997,Comedy
Braveheart (1995),4.016484,546.0,110,Action|Drama|War
Casablanca (1942),4.30099,505.0,912,Drama|Romance|War
E.T. the Extra-Terrestrial (1982),4.08985,601.0,1097,Children's|Drama|Fantasy|Sci-Fi
Fargo (1996),4.217656,657.0,608,Crime|Drama|Thriller
Forrest Gump (1994),4.045031,644.0,356,Comedy|Romance|War
L.A. Confidential (1997),4.106007,566.0,1617,Crime|Film-Noir|Mystery|Thriller
"Matrix, The (1999)",4.128405,514.0,2571,Action|Sci-Fi|Thriller
"Princess Bride, The (1987)",4.342767,636.0,1197,Action|Adventure|Comedy|Romance


In [29]:
ex2 =pd.concat([여성인기영화,movies.set_index('영화제목')], axis=1, join='inner').장르

In [30]:
ex2.value_counts()

Action|Drama|War                      2
Children's|Drama|Fantasy|Sci-Fi       1
Adventure|Children's|Drama|Musical    1
Thriller                              1
Crime|Drama|Thriller                  1
Action|Adventure|Comedy|Romance       1
Comedy|Romance|War                    1
Comedy                                1
Animation|Children's|Comedy           1
Drama|War                             1
Action|Adventure                      1
Action|Sci-Fi|Thriller                1
Drama|Thriller                        1
Action|Adventure|Fantasy|Sci-Fi       1
Crime|Drama                           1
Action|Adventure|Drama|Sci-Fi|War     1
Comedy|Drama                          1
Drama|Romance|War                     1
Comedy|Romance                        1
Crime|Film-Noir|Mystery|Thriller      1
Drama                                 1
Name: 장르, dtype: int64

In [31]:
#위에 영화 장르에서 Action|Drama|War 에 있는 '|' 제거
ex2.str.split('|')
# expand=True 로 하면 column으로 해줌(기본적으로 False)
ex2_expand = ex2.str.split('|', expand=True)

In [32]:
ex2_expand

Unnamed: 0_level_0,0,1,2,3,4
영화제목,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
American Beauty (1999),Comedy,Drama,,,
Being John Malkovich (1999),Comedy,,,,
Braveheart (1995),Action,Drama,War,,
Casablanca (1942),Drama,Romance,War,,
E.T. the Extra-Terrestrial (1982),Children's,Drama,Fantasy,Sci-Fi,
Fargo (1996),Crime,Drama,Thriller,,
Forrest Gump (1994),Comedy,Romance,War,,
L.A. Confidential (1997),Crime,Film-Noir,Mystery,Thriller,
"Matrix, The (1999)",Action,Sci-Fi,Thriller,,
"Princess Bride, The (1987)",Action,Adventure,Comedy,Romance,


In [33]:
장르1 = ex2_expand[0].value_counts()
장르2 = ex2_expand[1].value_counts()
장르3 = ex2_expand[2].value_counts()
장르4 = ex2_expand[3].value_counts()
장르5 = ex2_expand[4].value_counts()

In [34]:
장르1

Action        7
Comedy        4
Drama         4
Crime         3
Children's    1
Animation     1
Adventure     1
Thriller      1
Name: 0, dtype: int64

In [35]:
# index가 다르면 사칙연산시에 NaN값 반환 그래서 fill_value=0 로 넣어서 index가 서로 다르면 다른 인덱스에 0의 값이 있다고 해서 계산

장르1.add(장르5, fill_value =0)

Action        7.0
Adventure     1.0
Animation     1.0
Children's    1.0
Comedy        4.0
Crime         3.0
Drama         4.0
Thriller      1.0
War           1.0
dtype: float64

In [36]:
여성인기장르 = Series(dtype='float64')
for col in ex2_expand.columns:
    장르 = ex2_expand[col].value_counts()
    여성인기장르 = 여성인기장르.add(장르, fill_value =0)

In [37]:
여성인기장르

Action         7.0
Adventure      5.0
Animation      1.0
Children's     3.0
Comedy         6.0
Crime          3.0
Drama         12.0
Fantasy        2.0
Film-Noir      1.0
Musical        1.0
Mystery        1.0
Romance        4.0
Sci-Fi         4.0
Thriller       5.0
War            6.0
dtype: float64

In [38]:
여성인기장르.sort_values(ascending=False)

Drama         12.0
Action         7.0
Comedy         6.0
War            6.0
Adventure      5.0
Thriller       5.0
Romance        4.0
Sci-Fi         4.0
Children's     3.0
Crime          3.0
Fantasy        2.0
Animation      1.0
Film-Noir      1.0
Musical        1.0
Mystery        1.0
dtype: float64

## [실습 #3] 남자와 여자의 호불호가 크게 갈리는 영화 10개 찾기
전체 평점의 개수가 500개 이상인 영화만 대상으로 함.

In [39]:
ex3 = data.pivot_table(index='영화제목', columns='성별', values='평점', aggfunc =['mean','count'])

In [40]:
#1.전체 평점의 개수가 500개 이상인 영화만 선택
ex3 = ex3[ex3[('count','F')] + ex3[('count','M')] >=500]
ex3

Unnamed: 0_level_0,mean,mean,count,count
성별,F,M,F,M
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
10 Things I Hate About You (1999),3.646552,3.311966,232.0,468.0
101 Dalmatians (1961),3.791444,3.500000,187.0,378.0
12 Angry Men (1957),4.184397,4.328421,141.0,475.0
"13th Warrior, The (1999)",3.112000,3.168000,125.0,625.0
"20,000 Leagues Under the Sea (1954)",3.670103,3.709205,97.0,478.0
...,...,...,...,...
"X-Files: Fight the Future, The (1998)",3.489474,3.493797,190.0,806.0
X-Men (2000),3.682310,3.851702,277.0,1234.0
You've Got Mail (1998),3.542424,3.275591,330.0,508.0
Young Frankenstein (1974),4.289963,4.239177,269.0,924.0


In [41]:
# 2. 남자평점과 여자 평점의 차이 계산
ex3['diff'] = abs(ex3[('mean','F')] - ex3[('mean','M')])

In [42]:
ex3.nlargest(10,'diff')

Unnamed: 0_level_0,mean,mean,count,count,diff
성별,F,M,F,M,Unnamed: 5_level_1
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Dirty Dancing (1987),3.790378,2.959596,291.0,396.0,0.830782
"Good, The Bad and The Ugly, The (1966)",3.494949,4.2213,99.0,723.0,0.726351
Dumb & Dumber (1994),2.697987,3.336595,149.0,511.0,0.638608
Evil Dead II (Dead By Dawn) (1987),3.297297,3.909283,74.0,474.0,0.611985
Grease (1978),3.975265,3.367041,283.0,534.0,0.608224
Caddyshack (1980),3.396135,3.969737,207.0,760.0,0.573602
Animal House (1978),3.628906,4.167192,256.0,951.0,0.538286
"Exorcist, The (1973)",3.537634,4.067239,186.0,699.0,0.529605
"Rocky Horror Picture Show, The (1975)",3.673016,3.160131,315.0,918.0,0.512885
Big Trouble in Little China (1986),2.987952,3.48503,83.0,501.0,0.497078


## [실습 #4] 연령대 별로 영화 평점 분석하기
연령대(10대 미만, 10대, 20대, ...50대) 컬럼을 추가한 후, 영화별 연령대별 영화평점 구하기

In [43]:
def calculate_ages(x):
    if x<10:
        return '10대 미만'
    elif x<20:
        return '10대'
    elif x<30:
        return '20대'
    elif x<40:
        return '30대'
    elif x<50:
        return '40대'
    else:
        return '50대'
    

In [44]:
calculate_ages(30)

'30대'

In [45]:
연령대 = data.연령.apply(calculate_ages)
# data.연령.apply(lambda x:calculate_ages(x))

In [46]:
data['연령대'] = 연령대
data

Unnamed: 0,사용자아이디,성별,연령,직업,지역,영화아이디,평점,타임스탬프,영화제목,장르,연령대
0,1,F,1,10,48067,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama,10대 미만
1,2,M,56,16,70072,1193,5,978298413,One Flew Over the Cuckoo's Nest (1975),Drama,50대
2,12,M,25,12,32793,1193,4,978220179,One Flew Over the Cuckoo's Nest (1975),Drama,20대
3,15,M,25,7,22903,1193,4,978199279,One Flew Over the Cuckoo's Nest (1975),Drama,20대
4,17,M,50,1,95350,1193,5,978158471,One Flew Over the Cuckoo's Nest (1975),Drama,50대
...,...,...,...,...,...,...,...,...,...,...,...
1000204,5949,M,18,17,47901,2198,5,958846401,Modulations (1998),Documentary,10대
1000205,5675,M,35,14,30030,2703,3,976029116,Broken Vessels (1998),Drama,30대
1000206,5780,M,18,17,92886,2845,1,958153068,White Boys (1999),Drama,10대
1000207,5851,F,18,20,55410,3607,5,957756608,One Little Indian (1973),Comedy|Drama|Western,10대


In [47]:
ex4 = data.pivot_table(index='영화제목', columns='연령대', values='평점', aggfunc =['mean','count'])

In [48]:
ex4

Unnamed: 0_level_0,mean,mean,mean,mean,mean,mean,count,count,count,count,count,count
연령대,10대,10대 미만,20대,30대,40대,50대,10대,10대 미만,20대,30대,40대,50대
영화제목,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
"$1,000,000 Duck (1971)",3.000000,,3.090909,3.133333,2.000000,2.750000,6.0,,11.0,15.0,1.0,4.0
'Night Mother (1986),4.666667,2.000000,3.423077,2.904762,3.833333,3.750000,3.0,2.0,26.0,21.0,6.0,12.0
'Til There Was You (1997),2.500000,3.500000,2.666667,2.900000,2.333333,2.600000,8.0,2.0,24.0,10.0,3.0,5.0
"'burbs, The (1989)",3.244444,4.500000,2.652174,2.818182,2.545455,3.100000,90.0,2.0,115.0,55.0,11.0,30.0
...And Justice for All (1979),3.428571,3.000000,3.724138,3.657143,4.100000,3.674419,7.0,1.0,58.0,70.0,20.0,43.0
...,...,...,...,...,...,...,...,...,...,...,...,...
"Zed & Two Noughts, A (1985)",3.000000,1.000000,3.375000,3.777778,4.000000,3.000000,1.0,1.0,16.0,9.0,1.0,1.0
Zero Effect (1998),3.883333,4.125000,3.715278,3.608696,3.764706,3.769231,60.0,8.0,144.0,46.0,17.0,26.0
Zero Kelvin (Kjærlighetens kjøtere) (1995),,,,3.500000,,,,,,2.0,,
Zeus and Roxanne (1997),2.500000,1.500000,2.833333,3.500000,1.000000,,4.0,6.0,6.0,6.0,1.0,
