# 모델 간 비교(Comparative Analysis)

- 두 모델이 예측한 유저별 아이템 집합을 처리하여 대략적인 모델 성능을 유추해 볼 수 있습니다.

In [1]:
import pandas as pd

In [83]:
# data load
df_1 = pd.read_csv('./54.csv') 
df_2 = pd.read_csv('./30.csv')

In [84]:
# 두 데이터프레임에서 유저별 아이템 집합 구하기
pos_items_by_user = df_1.groupby('user')['item'].apply(set)
neg_items_by_user = df_2.groupby('user')['item'].apply(set)

In [85]:
# 유저별로 겹치는 아이템 확인
overlapping_users = pos_items_by_user.index.intersection(neg_items_by_user.index)
overlapping_items = [(user, pos_items_by_user[user].intersection(neg_items_by_user[user])) for user in overlapping_users]

In [86]:
# 겹치는 아이템을 가진 유저 출력
for user, items in overlapping_items:
    print(f"User {user}: {len(items)} overlapping items")
    print(items)

User 11: 10 overlapping items
{8961, 2, 32587, 7373, 7438, 40815, 47, 4370, 4886, 2174}
User 14: 8 overlapping items
{1028, 1223, 1035, 588, 1198, 1907, 919, 2011}
User 18: 9 overlapping items
{2692, 296, 1193, 8873, 4235, 5995, 50, 46578, 2324}
User 25: 8 overlapping items
{608, 1, 1923, 2762, 1259, 47, 1073, 1270}
User 31: 6 overlapping items
{2628, 68358, 8360, 6377, 68954, 79132}
User 35: 10 overlapping items
{5952, 33794, 4963, 48516, 2762, 2959, 4306, 6711, 3996, 44191}
User 43: 9 overlapping items
{4963, 4995, 2858, 1196, 4973, 1197, 17, 4886, 1210}
User 50: 10 overlapping items
{4993, 4226, 6377, 778, 32587, 1258, 4878, 527, 110, 750}
User 58: 8 overlapping items
{1090, 4995, 5669, 2571, 150, 1079, 2268, 1246}
User 60: 8 overlapping items
{4226, 356, 1223, 6539, 2571, 4973, 1198, 58559}
User 61: 7 overlapping items
{44195, 2918, 32587, 2959, 56367, 4979, 6711}
User 65: 9 overlapping items
{6016, 745, 2858, 2571, 4878, 527, 68954, 7323, 58559}
User 72: 9 overlapping items
{608, 

In [87]:
# 전체 데이터 313,600개 중 유저별로 겹치는 아이템 총 개수 계산
total = sum(len(items_set) for _, items_set in overlapping_items)
print(total)

259711


---

## Test Set Fitting

In [78]:
import pandas as pd

answer = pd.read_csv("answer.csv")
target = pd.read_csv("30.csv")

In [79]:
target

Unnamed: 0,user,item
0,11,4370
1,11,4886
2,11,40815
3,11,8961
4,11,32587
...,...,...
313595,138493,110
313596,138493,32587
313597,138493,48394
313598,138493,293


In [80]:
com = pd.merge(answer,target,on=['user','item'],how='inner')

In [81]:
len(com)/313600

0.23509885204081632

In [82]:
len(com)/313600 - 0.07826594387755101

0.1568329081632653