# 데이터 읽어오기

In [1]:
import pandas as pd
import numpy as np
movies = pd.read_csv("./dataset/tmdb_5000_movies.csv")         # tmdb_5000_movies.csv dataframe으로 읽어오기
movies = movies[["id","genres","vote_average","vote_count","popularity","original_title", "keywords","overview"]]
movies.drop(["genres","vote_average","vote_count","popularity", "keywords","overview"],axis=1,inplace=True)

print(movies.shape)
movies.head()

(4803, 2)


Unnamed: 0,id,original_title
0,19995,Avatar
1,285,Pirates of the Caribbean: At World's End
2,206647,Spectre
3,49026,The Dark Knight Rises
4,49529,John Carter


In [2]:
# 컬럼명 변경
movies.columns = ['tmdbId', 'original_title']
movies.head()

Unnamed: 0,tmdbId,original_title
0,19995,Avatar
1,285,Pirates of the Caribbean: At World's End
2,206647,Spectre
3,49026,The Dark Knight Rises
4,49529,John Carter


In [3]:
# links.csv dataframe으로 읽어오기
links = pd.read_csv("./dataset/links_small.csv")
links = links[["movieId","imdbId","tmdbId"]]
links.drop(['imdbId'],axis=1,inplace=True)

print(links.shape)
links.head()

(9125, 2)


Unnamed: 0,movieId,tmdbId
0,1,862.0
1,2,8844.0
2,3,15602.0
3,4,31357.0
4,5,11862.0


In [4]:
# ratings.csv dataframe으로 읽어오기
ratings = pd.read_csv("./dataset/ratings_small.csv")   
ratings = ratings[["userId","movieId","rating","timestamp"]]
ratings.drop(["timestamp"],axis=1,inplace=True)

print(ratings.shape)
ratings.head()

(100004, 3)


Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


## 결측값 확인


In [5]:
# null 값 개수 확인
movies.isnull().sum()

tmdbId            0
original_title    0
dtype: int64

In [6]:
# null 값 개수 확인
links.isnull().sum()

movieId     0
tmdbId     13
dtype: int64

In [7]:
# 결측값 있는 로우 제거
links = links.dropna(axis=0)

# 타입변환
links["tmdbId"] = links["tmdbId"].astype("int")

links.head()

Unnamed: 0,movieId,tmdbId
0,1,862
1,2,8844
2,3,15602
3,4,31357
4,5,11862


In [8]:
# null 값 개수 확인
ratings.isnull().sum()

userId     0
movieId    0
rating     0
dtype: int64

## 데이터 프레임끼리 inner join

In [9]:
join_movie = pd.merge(ratings, links, left_on='movieId', right_on='movieId', how='inner')
print(join_movie)

       userId  movieId  rating  tmdbId
0           1       31     2.5    9909
1           7       31     3.0    9909
2          31       31     4.0    9909
3          32       31     4.0    9909
4          36       31     3.0    9909
...       ...      ...     ...     ...
99928     664    64997     2.5   34812
99929     664    72380     3.5   22825
99930     665      129     3.0  110972
99931     665     4736     1.0   26602
99932     668     6425     1.0   36807

[99933 rows x 4 columns]


In [10]:
join_movie = pd.merge(join_movie, movies, left_on='tmdbId', right_on='tmdbId', how='inner')
print(join_movie)

       userId  movieId  rating  tmdbId  original_title
0           1     1061     3.0     819        Sleepers
1          19     1061     3.0     819        Sleepers
2          23     1061     3.5     819        Sleepers
3          30     1061     3.0     819        Sleepers
4          70     1061     5.0     819        Sleepers
...       ...      ...     ...     ...             ...
66942     663   134528     3.5  222936           Aloha
66943     663   137595     3.0  264999  Magic Mike XXL
66944     664    60832     3.0   12192       Pathology
66945     664    72380     3.5   22825         The Box
66946     665     4736     1.0   26602    Summer Catch

[66947 rows x 5 columns]


In [11]:
join_movie["original_title"].value_counts()

Forrest Gump                341
Pulp Fiction                324
The Shawshank Redemption    311
The Silence of the Lambs    304
Star Wars                   291
                           ... 
Penguins of Madagascar        1
Anomalisa                     1
Fireproof                     1
What Just Happened            1
Black Mass                    1
Name: original_title, Length: 3393, dtype: int64

In [12]:
# # movies에 있던 모든 영화가 join이 잘 됐는지 확인 -> 여긴 발표할 때는 빼도 될 듯..
# after = join_movie["tmdbId"].unique()
# print(len(after))
# print(movies.shape)

# latent 알고리즘

### 피벗테이블 생성

In [13]:
user_movie_rating = join_movie.pivot_table('rating', index = 'userId', columns='tmdbId').fillna(0)
print(user_movie_rating.shape)
user_movie_rating.head()

(671, 3394)


tmdbId,5,11,12,13,14,16,18,19,20,22,24,25,28,33,35,38,55,58,59,62,65,66,68,69,70,71,73,74,75,76,77,78,79,80,83,85,87,89,90,95,...,291270,291805,293660,293863,294086,294254,295964,296096,296098,296099,297596,297761,298312,301748,302699,303858,307081,308531,308639,310131,312221,313922,314365,316727,318846,321258,321697,321741,324668,325133,325173,328111,328425,329833,332411,332567,333371,334074,342521,347969
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,3.0,5.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,5.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,5.0,5.0,5.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,4.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### 원본 행렬에서 값이 0이 아닌 부분 인덱스 찾기

In [14]:
data = user_movie_rating.values

nonZeroX=[]
nonZeroY=[]

for i in range (0, data.shape[0]):
  for j in range(0, data.shape[1]):
    # print(mat[i][j])
    if (data[i][j] != 0):
      nonZeroX.append(i)
      nonZeroY.append(j)

### 원본 행렬에서 값이 0이 아닌 부분 값들 찾기
* 이후에 오차 계산할 때 사용

In [15]:
nonZeroData = []
for i,j in zip (nonZeroX, nonZeroY):
  nonZeroData.append(data[i][j])

In [16]:
# 개수 확인
print(len(nonZeroData))

66947


## 학습

* SGD (Stochastic Gradient Descent)
 - http://ntucsu.csie.ntu.edu.tw/~cjlin/papers/libmf/libmf.pdf
 - https://velog.io/@vvakki_/Matrix-Factorization-2
 - https://soobarkbar.tistory.com/105
 - 구현이 쉽고, 비교적 빠른 실행 시간

* RMSE (Root Mean Square Error)
  - 오차: 실제 값과 예측 값의 차이
  - 오차들의 제곱을 모두 더한 뒤 평균내고 루트 씌움

In [17]:
from sklearn.metrics import mean_squared_error

# 분해할 행렬의 shape
num_users, num_items = user_movie_rating.shape
K = 150

# 랜덤 값으로 채움
np.random.seed(1)
P = np.random.normal(scale=1.0/K, size=(num_users, K))
Q = np.random.normal(scale=1.0/K, size=(num_items, K))

# 과적합 되지 않게 보정
r_lambda = 0.03

# 학습률
lr = 0.02

# 반복 횟수
step = 100



for epoch in range(1, step+1):
  for i,j in zip (nonZeroX, nonZeroY):
    # 결과 행렬의 i, j 항은 P행렬의 i행과 Q행렬의 j행렬의 행렬곱 값이다.
    # 에러 구함
    error = data[i][j] - np.dot(P[i, :], Q[j, :].T)
    
    # 구한 에러를 바탕으로 SGD를 이용해 P, Q 행렬 보정
    P[i, :] = P[i, :] + lr*(error * Q[j, :] - r_lambda*P[i, :])
    Q[j, :] = Q[j, :] + lr*(error * P[i, :] - r_lambda*Q[j, :])


  # 보정한 행렬을 바탕으로 예측
  prediction = np.dot(P, Q.T)
  errorList = []
  # 원본 행렬에서 0이 아닌 값이 있는 위치에 있는 값들을 가져옴
  for i,j in zip (nonZeroX, nonZeroY):
    errorList.append(prediction[i][j])

  if (epoch) % 10 == 0:
      # 오차 계산
      rmse = mean_squared_error(nonZeroData, errorList, squared=False)
      print("epoch: {}, rmse: {:.6f}" .format(epoch, rmse))


epoch: 10, rmse: 0.741029
epoch: 20, rmse: 0.440402
epoch: 30, rmse: 0.302979
epoch: 40, rmse: 0.256121
epoch: 50, rmse: 0.238608
epoch: 60, rmse: 0.230862
epoch: 70, rmse: 0.226846
epoch: 80, rmse: 0.224479
epoch: 90, rmse: 0.222942
epoch: 100, rmse: 0.221869


## 예측한 행렬 확인

In [18]:
# 예측한 행렬 보이기
pred_matrix = np.dot(P, Q.T)
pred_matrix_df = pd.DataFrame(pred_matrix, columns = user_movie_rating.columns, index = user_movie_rating.index)
print(pred_matrix_df.shape)
pred_matrix_df.head()

(671, 3394)


tmdbId,5,11,12,13,14,16,18,19,20,22,24,25,28,33,35,38,55,58,59,62,65,66,68,69,70,71,73,74,75,76,77,78,79,80,83,85,87,89,90,95,...,291270,291805,293660,293863,294086,294254,295964,296096,296098,296099,297596,297761,298312,301748,302699,303858,307081,308531,308639,310131,312221,313922,314365,316727,318846,321258,321697,321741,324668,325133,325173,328111,328425,329833,332411,332567,333371,334074,342521,347969
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,2.018751,2.027506,2.435144,2.871769,2.817492,1.930649,2.823132,2.634303,2.821766,2.666339,2.634461,2.532899,2.568984,1.771797,2.510193,3.179689,2.852497,2.264981,2.165702,2.586458,2.822235,1.066868,2.403794,2.746004,2.575535,2.521613,2.993731,1.641606,2.525566,2.015121,2.731844,2.468621,2.589698,2.318749,2.604418,2.74335,2.612891,3.088609,2.718369,1.917375,...,0.910961,1.588141,2.078146,1.373102,2.851364,1.757706,1.117645,2.246058,2.306275,1.811463,0.835749,1.684519,1.260325,0.832774,1.148962,0.695593,2.349331,0.906204,2.102246,2.849096,2.694763,1.89141,2.633078,0.92198,2.144226,1.267005,1.69952,0.448228,2.332248,2.456592,1.678784,2.044131,1.886671,1.571274,0.835638,1.617979,2.811076,1.742602,0.44204,1.273466
2,2.427161,3.817115,3.434385,3.138006,4.241183,3.600734,3.015552,3.931935,3.837951,3.102039,2.804551,3.362443,4.052468,3.132763,3.651009,3.488509,3.666145,2.972174,3.183374,2.790746,3.242761,2.400431,3.578665,3.632657,3.496876,3.958071,3.940751,3.044208,2.733309,3.183643,4.167515,3.589553,3.400626,3.225443,3.05366,3.44244,2.915091,3.331553,3.227795,2.788102,...,1.098724,2.15999,2.983313,1.586043,3.853547,2.789182,1.44041,3.377829,3.453019,2.790843,1.061084,2.195993,1.770272,1.093786,1.343487,0.813593,3.239972,1.06929,2.699711,3.227768,3.49268,2.601517,3.57194,1.082851,3.376057,1.772019,2.630231,0.53159,3.208327,3.009409,2.171388,3.197801,2.751044,2.007929,1.082741,1.914578,3.754777,2.405215,0.5311,1.936493
3,4.023255,3.817229,3.136991,4.789111,3.940762,3.634627,3.656024,2.934828,3.645171,3.174454,3.321543,3.668277,3.790515,3.545611,3.349498,3.25154,4.293434,3.130587,3.597218,3.66528,3.281025,2.079181,3.984719,3.847803,4.22789,3.448875,4.15306,2.388801,3.228924,3.271027,3.86399,3.700109,3.878203,3.550742,3.010362,3.373351,3.17671,3.502675,3.630917,3.374811,...,1.005467,2.445635,2.661484,1.456493,3.682971,2.820148,1.499687,3.348917,3.446412,2.566685,1.129594,2.277213,2.021497,1.160687,1.234385,0.739645,3.234161,1.000136,2.902811,3.285623,3.456475,2.976042,3.797062,0.962695,3.563787,2.025661,2.742737,0.503826,3.40676,3.005873,2.281968,3.247178,2.450493,2.149517,1.13941,1.768384,4.150747,2.459349,0.476006,1.980283
4,3.864301,5.227648,4.769728,4.83727,4.691342,4.66492,4.738351,4.802329,4.629582,4.70306,4.604002,4.225131,4.935638,4.33164,4.494026,4.632196,5.207301,4.001825,4.567845,4.490932,4.153328,3.131922,4.936264,4.566084,4.182184,4.395161,4.407573,3.427158,3.355405,4.514886,5.142739,4.875192,4.677044,4.612391,4.136023,5.082602,4.902794,4.971589,4.48469,3.681897,...,1.452187,2.784791,4.16217,2.202172,4.632768,2.897064,2.052598,3.841639,3.880274,2.903438,1.537919,3.078964,2.086227,1.555216,1.811535,1.102746,4.001492,1.460564,3.595921,4.634398,4.545687,3.618991,4.734478,1.485341,3.594257,2.060442,2.987802,0.724034,4.068131,4.112765,3.0806,3.54918,3.211044,2.582616,1.532466,2.590606,4.941381,2.994904,0.737824,2.199557
5,3.409294,3.917637,4.113348,4.193443,3.681283,3.373367,3.571902,3.090779,3.836153,3.808318,3.309189,3.8235,3.438675,3.54369,3.64682,3.751535,4.266472,3.954401,3.347693,2.635992,3.636165,3.293841,3.604791,4.465417,4.378725,3.255107,4.18521,3.052548,2.517825,3.667324,4.142628,3.145109,3.791333,3.777213,3.365705,4.206274,3.675117,4.355264,3.958406,3.278238,...,1.040381,2.413214,4.237942,1.555525,3.861235,3.328374,1.594497,3.8542,3.645076,2.93207,1.203804,2.42455,1.961096,1.203099,1.330319,0.79153,3.449697,1.043456,2.999047,3.312344,3.552696,2.986536,4.202085,1.039779,3.127477,1.963145,3.077018,0.54247,3.594184,3.238746,2.407582,3.757537,2.770761,2.363732,1.200014,1.88403,4.277915,2.610122,0.493224,2.216691


#### 0.5점 단위로 끊어서 확인

In [19]:
# 예측 행렬을 원본 점수처럼 0.5점 단위로 끊어서 표시하기

thres = [0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]


for i in range(0, pred_matrix.shape[0]):
  for j in range(0, pred_matrix.shape[1]):
    for k in thres:
      if pred_matrix[i][j] < 0:
        pred_matrix[i][j] = 0.0
        break
      elif pred_matrix[i][j] > 5.0:
        pred_matrix[i][j] = 5.0
        break      
      elif (pred_matrix[i][j]>= k-0.25) and (pred_matrix[i][j]< k+0.25):
        pred_matrix[i][j] = k
        break


pred_matrix_df_05 = pd.DataFrame(pred_matrix, columns = user_movie_rating.columns, index = user_movie_rating.index)
print(pred_matrix_df_05.shape)
pred_matrix_df_05.head()       


(671, 3394)


tmdbId,5,11,12,13,14,16,18,19,20,22,24,25,28,33,35,38,55,58,59,62,65,66,68,69,70,71,73,74,75,76,77,78,79,80,83,85,87,89,90,95,...,291270,291805,293660,293863,294086,294254,295964,296096,296098,296099,297596,297761,298312,301748,302699,303858,307081,308531,308639,310131,312221,313922,314365,316727,318846,321258,321697,321741,324668,325133,325173,328111,328425,329833,332411,332567,333371,334074,342521,347969
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,2.0,2.0,2.5,3.0,3.0,2.0,3.0,2.5,3.0,2.5,2.5,2.5,2.5,2.0,2.5,3.0,3.0,2.5,2.0,2.5,3.0,1.0,2.5,2.5,2.5,2.5,3.0,1.5,2.5,2.0,2.5,2.5,2.5,2.5,2.5,2.5,2.5,3.0,2.5,2.0,...,1.0,1.5,2.0,1.5,3.0,2.0,1.0,2.0,2.5,2.0,1.0,1.5,1.5,1.0,1.0,0.5,2.5,1.0,2.0,3.0,2.5,2.0,2.5,1.0,2.0,1.5,1.5,0.5,2.5,2.5,1.5,2.0,2.0,1.5,1.0,1.5,3.0,1.5,0.5,1.5
2,2.5,4.0,3.5,3.0,4.0,3.5,3.0,4.0,4.0,3.0,3.0,3.5,4.0,3.0,3.5,3.5,3.5,3.0,3.0,3.0,3.0,2.5,3.5,3.5,3.5,4.0,4.0,3.0,2.5,3.0,4.0,3.5,3.5,3.0,3.0,3.5,3.0,3.5,3.0,3.0,...,1.0,2.0,3.0,1.5,4.0,3.0,1.5,3.5,3.5,3.0,1.0,2.0,2.0,1.0,1.5,1.0,3.0,1.0,2.5,3.0,3.5,2.5,3.5,1.0,3.5,2.0,2.5,0.5,3.0,3.0,2.0,3.0,3.0,2.0,1.0,2.0,4.0,2.5,0.5,2.0
3,4.0,4.0,3.0,5.0,4.0,3.5,3.5,3.0,3.5,3.0,3.5,3.5,4.0,3.5,3.5,3.5,4.5,3.0,3.5,3.5,3.5,2.0,4.0,4.0,4.0,3.5,4.0,2.5,3.0,3.5,4.0,3.5,4.0,3.5,3.0,3.5,3.0,3.5,3.5,3.5,...,1.0,2.5,2.5,1.5,3.5,3.0,1.5,3.5,3.5,2.5,1.0,2.5,2.0,1.0,1.0,0.5,3.0,1.0,3.0,3.5,3.5,3.0,4.0,1.0,3.5,2.0,2.5,0.5,3.5,3.0,2.5,3.0,2.5,2.0,1.0,2.0,4.0,2.5,0.5,2.0
4,4.0,5.0,5.0,5.0,4.5,4.5,4.5,5.0,4.5,4.5,4.5,4.0,5.0,4.5,4.5,4.5,5.0,4.0,4.5,4.5,4.0,3.0,5.0,4.5,4.0,4.5,4.5,3.5,3.5,4.5,5.0,5.0,4.5,4.5,4.0,5.0,5.0,5.0,4.5,3.5,...,1.5,3.0,4.0,2.0,4.5,3.0,2.0,4.0,4.0,3.0,1.5,3.0,2.0,1.5,2.0,1.0,4.0,1.5,3.5,4.5,4.5,3.5,4.5,1.5,3.5,2.0,3.0,0.5,4.0,4.0,3.0,3.5,3.0,2.5,1.5,2.5,5.0,3.0,0.5,2.0
5,3.5,4.0,4.0,4.0,3.5,3.5,3.5,3.0,4.0,4.0,3.5,4.0,3.5,3.5,3.5,4.0,4.5,4.0,3.5,2.5,3.5,3.5,3.5,4.5,4.5,3.5,4.0,3.0,2.5,3.5,4.0,3.0,4.0,4.0,3.5,4.0,3.5,4.5,4.0,3.5,...,1.0,2.5,4.0,1.5,4.0,3.5,1.5,4.0,3.5,3.0,1.0,2.5,2.0,1.0,1.5,1.0,3.5,1.0,3.0,3.5,3.5,3.0,4.0,1.0,3.0,2.0,3.0,0.5,3.5,3.0,2.5,4.0,3.0,2.5,1.0,2.0,4.5,2.5,0.5,2.0
