# 협업 필터링 (Collaborative filtering)

In [26]:
# arena_util.py
# -*- coding: utf-8 -*-

import io
import os
import json
import torch
import distutils.dir_util
from collections import Counter

import numpy as np



def write_json(data, fname):
    def _conv(o):
        if isinstance(o, np.int64) or isinstance(o, np.int32):
            return int(o)
        raise TypeError

    parent = os.path.dirname(fname)
    distutils.dir_util.mkpath("./output/" + parent)
    with io.open("./output/" + fname, "w", encoding="utf8") as f:
        json_str = json.dumps(data, ensure_ascii=False, default=_conv)
        f.write(json_str)


def load_json(fname):
    with open(fname, encoding='utf8') as f:
        json_obj = json.load(f)

    return json_obj


def debug_json(r):
    print(json.dumps(r, ensure_ascii=False, indent=4))



Custom evaluating (weak)

In [3]:
# evaluate.py
# -*- coding: utf-8 -*-
# import fire
import numpy as np

# from arena_util import load_json


class CustomEvaluator:
    def _idcg(self, l):
        return sum((1.0 / np.log(i + 2) for i in range(l)))

    def __init__(self):
        self._idcgs = [self._idcg(i) for i in range(101)]

    def _ndcg(self, gt, rec):
        dcg = 0.0
        for i, r in enumerate(rec):
            if r in gt:
                dcg += 1.0 / np.log(i + 2)

        return dcg / self._idcgs[len(gt)]

    def _eval(self, gt_fname, rec_fname):
        gt_playlists = load_json(gt_fname)
        gt_dict = {g["id"]: g for g in gt_playlists}
        rec_playlists = load_json(rec_fname)
        
        music_ndcg = 0.0
        tag_ndcg = 0.0

        for rec in rec_playlists:
            gt = gt_dict[rec["id"]]
            music_ndcg += self._ndcg(gt["songs"], rec["songs"][:100])
            tag_ndcg += self._ndcg(gt["tags"], rec["tags"][:10])

        music_ndcg = music_ndcg / len(rec_playlists)
        tag_ndcg = tag_ndcg / len(rec_playlists)
        score = music_ndcg * 0.85 + tag_ndcg * 0.15

        return music_ndcg, tag_ndcg, score

    def evaluate(self, gt_fname, rec_fname):
        # try:
            music_ndcg, tag_ndcg, score = self._eval(gt_fname, rec_fname)
            print(f"Music nDCG: {music_ndcg:.6}")
            print(f"Tag nDCG: {tag_ndcg:.6}")
            print(f"Score: {score:.6}")
        # except Exception as e:
        #     print(e)


# if __name__ == "__main__":
#     fire.Fire(ArenaEvaluator)


In [4]:
from collections import Counter

import numpy as np
import pandas as pd

import scipy.sparse as spr
import pickle

In [5]:
song_meta = pd.read_json("arena_data/song_meta.json")
train = pd.read_json("arena_data/train.json")
test = pd.read_json("arena_data/val.json")

In [6]:
song_meta

Unnamed: 0,song_gn_dtl_gnr_basket,issue_date,album_name,album_id,artist_id_basket,song_name,song_gn_gnr_basket,artist_name_basket,id
0,[GN0901],20140512,불후의 명곡 - 7080 추억의 얄개시대 팝송베스트,2255639,[2727],Feelings,[GN0900],[Various Artists],0
1,"[GN1601, GN1606]",20080421,"Bach : Partitas Nos. 2, 3 & 4",376431,[29966],"Bach : Partita No. 4 In D Major, BWV 828 - II....",[GN1600],[Murray Perahia],1
2,[GN0901],20180518,Hit,4698747,[3361],Solsbury Hill (Remastered 2002),[GN0900],[Peter Gabriel],2
3,"[GN1102, GN1101]",20151016,Feeling Right (Everything Is Nice) (Feat. Popc...,2644882,[838543],Feeling Right (Everything Is Nice) (Feat. Popc...,[GN1100],[Matoma],3
4,"[GN1802, GN1801]",20110824,그남자 그여자,2008470,[560160],그남자 그여자,[GN1800],[Jude Law],4
...,...,...,...,...,...,...,...,...,...
707984,[GN2001],19991219,The Best Best Of The Black President,65254,[166499],Coffin For Head Of State,[GN2000],[Fela Kuti],707984
707985,[GN0901],19860000,True Colors,44141,[11837],Change Of Heart,[GN0900],[Cyndi Lauper],707985
707986,"[GN0105, GN0101]",20160120,행보 2015 윤종신 / 작사가 윤종신 Live Part.1,2662866,[437],스치듯 안녕,[GN0100],[윤종신],707986
707987,"[GN1807, GN1801]",20131217,명상의 시간을 위한 뉴에이지 음악,2221722,[729868],숲의 빛,[GN1800],[Nature Piano],707987


In [7]:
train

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date
0,[락],61281,여행같은 음악,"[525514, 129701, 383374, 562083, 297861, 13954...",71,2013-12-19 18:36:19.000
1,"[추억, 회상]",10532,요즘 너 말야,"[432406, 675945, 497066, 120377, 389529, 24427...",1,2014-12-02 16:19:42.000
2,"[까페, 잔잔한]",76951,"편하게, 잔잔하게 들을 수 있는 곡.-","[83116, 276692, 166267, 186301, 354465, 256598...",17,2017-08-28 07:09:34.000
3,"[연말, 눈오는날, 캐럴, 분위기, 따듯한, 크리스마스캐럴, 겨울노래, 크리스마스,...",147456,크리스마스 분위기에 흠뻑 취하고 싶을때,"[394031, 195524, 540149, 287984, 440773, 10033...",33,2019-12-05 15:15:18.000
4,[댄스],27616,추억의 노래 ㅋ,"[159327, 553610, 5130, 645103, 294435, 100657,...",9,2011-10-25 13:54:56.000
...,...,...,...,...,...,...
115066,"[록메탈, 밴드사운드, 록, 락메탈, 메탈, 락, extreme]",120325,METAL E'SM #2,"[429629, 441511, 612106, 516359, 691768, 38714...",3,2020-04-17 04:31:11.000
115067,[일렉],106976,빠른 리스너를 위한 따끈따끈한 최신 인기 EDM 모음!,"[321330, 216057, 534472, 240306, 331098, 23288...",13,2015-12-24 17:23:19.000
115068,"[담시, 가족, 눈물, 그리움, 주인공, 나의_이야기, 사랑, 친구]",11343,#1. 눈물이 앞을 가리는 나의_이야기,"[50512, 249024, 250608, 371171, 229942, 694943...",4,2019-08-16 20:59:22.000
115069,"[잔잔한, 버스, 퇴근버스, Pop, 풍경, 퇴근길]",131982,퇴근 버스에서 편히 들으면서 하루를 마무리하기에 좋은 POP,"[533534, 608114, 343608, 417140, 609009, 30217...",4,2019-10-25 23:40:42.000


In [8]:
test

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date
0,[],118598,,"[373313, 151080, 275346, 696876, 165237, 52593...",1675,2019-05-27 14:14:33.000
1,[],131447,앨리스테이블,[],1,2014-07-16 15:24:24.000
2,[],51464,,"[529437, 516103, 360067, 705713, 226062, 37089...",62,2008-06-21 23:26:22.000
3,[],45144,,"[589668, 21711, 570151, 320043, 13930, 599327,...",20,2017-10-30 18:15:43.000
4,[],79929,,"[672718, 121924, 102694, 683657, 201558, 38511...",20,2017-02-07 11:40:42.000
...,...,...,...,...,...,...
23010,[잔잔한],101722,,"[75842, 26083, 244183, 684715, 500593, 508608,...",17,2015-12-17 14:06:05.000
23011,"[어머니, 힘들때, 아빠, 가족, 위로받고싶을때]",122127,,"[450275, 487671, 561031, 663944, 628672, 59121...",10,2020-04-16 21:35:44.000
23012,[],77438,,"[625875, 464051, 11657, 236393, 358186, 213435...",0,2019-03-27 15:27:40.000
23013,[],36231,,"[161094, 665833, 688145, 432735, 439938, 12665...",31,2015-11-18 11:49:09.000


playlist, song, tag의 id(각각 nid, sid, tid)를 새로 생성하는 이유는, 새로 생성할 id를 matrix의 row, column index로 사용할 것이기 때문입니다.

- plylst_id_nid : playlist id -> nid
- plylst_nid_id : playlist nid -> id
- song_id_sid : song id -> sid
- song_sid_id : song sid -> id
- tag_id_tid : tag id -> tid
- tag_tid_id : tag tid -> id
- song_dict : song id -> count
- tag_dict : tag id -> count

In [9]:
train['istrain'] = 1
test['istrain'] = 0

n_train = len(train)
n_test = len(test)

# train + test
plylst = pd.concat([train, test], ignore_index=True)

# playlist id
plylst["nid"] = range(n_train + n_test)

# id <-> nid
plylst_id_nid = dict(zip(plylst["id"],plylst["nid"]))
plylst_nid_id = dict(zip(plylst["nid"],plylst["id"]))

In [10]:
plylst

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date,istrain,nid
0,[락],61281,여행같은 음악,"[525514, 129701, 383374, 562083, 297861, 13954...",71,2013-12-19 18:36:19.000,1,0
1,"[추억, 회상]",10532,요즘 너 말야,"[432406, 675945, 497066, 120377, 389529, 24427...",1,2014-12-02 16:19:42.000,1,1
2,"[까페, 잔잔한]",76951,"편하게, 잔잔하게 들을 수 있는 곡.-","[83116, 276692, 166267, 186301, 354465, 256598...",17,2017-08-28 07:09:34.000,1,2
3,"[연말, 눈오는날, 캐럴, 분위기, 따듯한, 크리스마스캐럴, 겨울노래, 크리스마스,...",147456,크리스마스 분위기에 흠뻑 취하고 싶을때,"[394031, 195524, 540149, 287984, 440773, 10033...",33,2019-12-05 15:15:18.000,1,3
4,[댄스],27616,추억의 노래 ㅋ,"[159327, 553610, 5130, 645103, 294435, 100657,...",9,2011-10-25 13:54:56.000,1,4
...,...,...,...,...,...,...,...,...
138081,[잔잔한],101722,,"[75842, 26083, 244183, 684715, 500593, 508608,...",17,2015-12-17 14:06:05.000,0,138081
138082,"[어머니, 힘들때, 아빠, 가족, 위로받고싶을때]",122127,,"[450275, 487671, 561031, 663944, 628672, 59121...",10,2020-04-16 21:35:44.000,0,138082
138083,[],77438,,"[625875, 464051, 11657, 236393, 358186, 213435...",0,2019-03-27 15:27:40.000,0,138083
138084,[],36231,,"[161094, 665833, 688145, 432735, 439938, 12665...",31,2015-11-18 11:49:09.000,0,138084


In [11]:
plylst_tag = plylst['tags']
tag_counter = Counter([tg for tgs in plylst_tag for tg in tgs])
tag_dict = {x: tag_counter[x] for x in tag_counter}

tag_id_tid = dict()
tag_tid_id = dict()
for i, t in enumerate(tag_dict):
  tag_id_tid[t] = i
  tag_tid_id[i] = t

n_tags = len(tag_dict)

plylst_song = plylst['songs']
song_counter = Counter([sg for sgs in plylst_song for sg in sgs])
song_dict = {x: song_counter[x] for x in song_counter}

song_id_sid = dict()
song_sid_id = dict()
for i, t in enumerate(song_dict):
  song_id_sid[t] = i
  song_sid_id[i] = t

n_songs = len(song_dict)

plylst의 songs와 tags를 새로운 id로 변환하여 DataFrame에 추가합니다

In [12]:
plylst['songs_id'] = plylst['songs'].map(lambda x: [song_id_sid.get(s) for s in x if song_id_sid.get(s) != None])
plylst['tags_id'] = plylst['tags'].map(lambda x: [tag_id_tid.get(t) for t in x if tag_id_tid.get(t) != None])

In [13]:
plylst

Unnamed: 0,tags,id,plylst_title,songs,like_cnt,updt_date,istrain,nid,songs_id,tags_id
0,[락],61281,여행같은 음악,"[525514, 129701, 383374, 562083, 297861, 13954...",71,2013-12-19 18:36:19.000,1,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[0]
1,"[추억, 회상]",10532,요즘 너 말야,"[432406, 675945, 497066, 120377, 389529, 24427...",1,2014-12-02 16:19:42.000,1,1,"[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3...","[1, 2]"
2,"[까페, 잔잔한]",76951,"편하게, 잔잔하게 들을 수 있는 곡.-","[83116, 276692, 166267, 186301, 354465, 256598...",17,2017-08-28 07:09:34.000,1,2,"[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 7...","[3, 4]"
3,"[연말, 눈오는날, 캐럴, 분위기, 따듯한, 크리스마스캐럴, 겨울노래, 크리스마스,...",147456,크리스마스 분위기에 흠뻑 취하고 싶을때,"[394031, 195524, 540149, 287984, 440773, 10033...",33,2019-12-05 15:15:18.000,1,3,"[89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 1...","[5, 6, 7, 8, 9, 10, 11, 12, 13, 14]"
4,[댄스],27616,추억의 노래 ㅋ,"[159327, 553610, 5130, 645103, 294435, 100657,...",9,2011-10-25 13:54:56.000,1,4,"[127, 128, 129, 130, 131, 132, 133, 134, 135, ...",[15]
...,...,...,...,...,...,...,...,...,...,...
138081,[잔잔한],101722,,"[75842, 26083, 244183, 684715, 500593, 508608,...",17,2015-12-17 14:06:05.000,0,138081,"[5607, 1025, 9650, 543806, 1424, 7372, 2234, 2...",[4]
138082,"[어머니, 힘들때, 아빠, 가족, 위로받고싶을때]",122127,,"[450275, 487671, 561031, 663944, 628672, 59121...",10,2020-04-16 21:35:44.000,0,138082,"[638333, 244876, 108022, 420983, 20258, 595078...","[11913, 335, 3162, 455, 23086]"
138083,[],77438,,"[625875, 464051, 11657, 236393, 358186, 213435...",0,2019-03-27 15:27:40.000,0,138083,"[1435, 718, 2659, 2773, 1359, 8731, 696, 697, ...",[]
138084,[],36231,,"[161094, 665833, 688145, 432735, 439938, 12665...",31,2015-11-18 11:49:09.000,0,138084,"[3091, 308295, 428975, 80278, 35027, 234993, 8...",[]


In [14]:
plylst_use = plylst[['istrain','nid','updt_date','songs_id','tags_id']]
plylst_use.loc[:,'num_songs'] = plylst_use['songs_id'].map(len)
plylst_use.loc[:,'num_tags'] = plylst_use['tags_id'].map(len)
plylst_use = plylst_use.set_index('nid')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plylst_use.loc[:,'num_songs'] = plylst_use['songs_id'].map(len)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  plylst_use.loc[:,'num_tags'] = plylst_use['tags_id'].map(len)


In [15]:
plylst_use

Unnamed: 0_level_0,istrain,updt_date,songs_id,tags_id,num_songs,num_tags
nid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1,2013-12-19 18:36:19.000,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",[0],19,1
1,1,2014-12-02 16:19:42.000,"[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3...","[1, 2]",42,2
2,1,2017-08-28 07:09:34.000,"[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 7...","[3, 4]",28,2
3,1,2019-12-05 15:15:18.000,"[89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 1...","[5, 6, 7, 8, 9, 10, 11, 12, 13, 14]",38,10
4,1,2011-10-25 13:54:56.000,"[127, 128, 129, 130, 131, 132, 133, 134, 135, ...",[15],53,1
...,...,...,...,...,...,...
138081,0,2015-12-17 14:06:05.000,"[5607, 1025, 9650, 543806, 1424, 7372, 2234, 2...",[4],48,1
138082,0,2020-04-16 21:35:44.000,"[638333, 244876, 108022, 420983, 20258, 595078...","[11913, 335, 3162, 455, 23086]",100,5
138083,0,2019-03-27 15:27:40.000,"[1435, 718, 2659, 2773, 1359, 8731, 696, 697, ...",[],12,0
138084,0,2015-11-18 11:49:09.000,"[3091, 308295, 428975, 80278, 35027, 234993, 8...",[],9,0


In [16]:
plylst_train = plylst_use.iloc[:n_train,:]
plylst_test = plylst_use.iloc[n_train:,:]

test set에서 샘플 300개만 뽑아 테스트해봅니다.

In [17]:
# sample test
# np.random.seed(33)
# n_sample = 300

# test = plylst_test.iloc[np.random.choice(range(n_test), n_sample, replace=False),:]

# real test
test = plylst_test
# print(len(test))

In [18]:
test

Unnamed: 0_level_0,istrain,updt_date,songs_id,tags_id,num_songs,num_tags
nid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
115071,0,2019-05-27 14:14:33.000,"[82770, 73350, 49850, 283466, 38811, 14654, 83...",[],27,0
115072,0,2014-07-16 15:24:24.000,[],[],0,0
115073,0,2008-06-21 23:26:22.000,"[42084, 86991, 615142, 615143, 66432, 191918, ...",[],14,0
115074,0,2017-10-30 18:15:43.000,"[19289, 156274, 92524, 5729, 9179, 4694, 3233,...",[],17,0
115075,0,2017-02-07 11:40:42.000,"[72186, 47442, 47461, 24939, 209259, 81164, 24...",[],8,0
...,...,...,...,...,...,...
138081,0,2015-12-17 14:06:05.000,"[5607, 1025, 9650, 543806, 1424, 7372, 2234, 2...",[4],48,1
138082,0,2020-04-16 21:35:44.000,"[638333, 244876, 108022, 420983, 20258, 595078...","[11913, 335, 3162, 455, 23086]",100,5
138083,0,2019-03-27 15:27:40.000,"[1435, 718, 2659, 2773, 1359, 8731, 696, 697, ...",[],12,0
138084,0,2015-11-18 11:49:09.000,"[3091, 308295, 428975, 80278, 35027, 234993, 8...",[],9,0


row가 playlist(nid)이고 column이 item(sid or tid)인 sparse matrix A를 만듭니다.

In [19]:
row = np.repeat(range(n_train), plylst_train['num_songs'])
col = [song for songs in plylst_train['songs_id'] for song in songs]
dat = np.repeat(1, plylst_train['num_songs'].sum())
train_songs_A = spr.csr_matrix((dat, (row, col)), shape=(n_train, n_songs)) # shape: (115071, 638336)

row = np.repeat(range(n_train), plylst_train['num_tags'])
col = [tag for tags in plylst_train['tags_id'] for tag in tags]
dat = np.repeat(1, plylst_train['num_tags'].sum())
train_tags_A = spr.csr_matrix((dat, (row, col)), shape=(n_train, n_tags))   # shape: (115071, 30197)

In [20]:
train_songs_A_T = train_songs_A.T.tocsr()
train_tags_A_T = train_tags_A.T.tocsr()

In [28]:
from tqdm import tqdm

def rec(pids):
  tt = 1

  res = []

  for pid in tqdm(pids):
    p = np.zeros((n_songs,1)) # shape: (638336, 1)
    p[test.loc[pid,'songs_id']] = 1 # 현재 pid의 songs_id에 있는 곡들

    val = train_songs_A.dot(p).reshape(-1)  # shape: (115071,)

    songs_already = test.loc[pid, "songs_id"]
    tags_already = test.loc[pid, "tags_id"]

    cand_song = train_songs_A_T.dot(val)  # shape: (638336,)
    cand_song_idx = cand_song.reshape(-1).argsort()[-300:][::-1]

    cand_song_idx = cand_song_idx[np.isin(cand_song_idx, songs_already) == False][:100] # pid가 들은 적 없는 곡들만 넣기
    rec_song_idx = [song_sid_id[i] for i in cand_song_idx]

    cand_tag = train_tags_A_T.dot(val)
    cand_tag_idx = cand_tag.reshape(-1).argsort()[-30:][::-1]

    cand_tag_idx = cand_tag_idx[np.isin(cand_tag_idx, tags_already) == False][:10]
    rec_tag_idx = [tag_tid_id[i] for i in cand_tag_idx]

    res.append({
                "id": plylst_nid_id[pid],
                "songs": rec_song_idx,
                "tags": rec_tag_idx
            })
    
    if tt % 1000 == 0:
      print(tt)

    tt += 1
  return res

In [29]:
answers = rec(test.index)

  4%|▍         | 1001/23015 [01:43<37:30,  9.78it/s]

1000


  9%|▊         | 2001/23015 [03:25<36:15,  9.66it/s]

2000


 13%|█▎        | 3000/23015 [05:06<33:57,  9.82it/s]

3000


 17%|█▋        | 4001/23015 [06:48<32:40,  9.70it/s]

4000


 22%|██▏       | 5001/23015 [08:31<30:39,  9.79it/s]

5000


 26%|██▌       | 6001/23015 [10:13<29:05,  9.75it/s]

6000


 30%|███       | 7001/23015 [11:56<27:40,  9.65it/s]

7000


 35%|███▍      | 8002/23015 [13:37<24:38, 10.16it/s]

8000


 39%|███▉      | 9002/23015 [15:15<23:13, 10.06it/s]

9000


 43%|████▎     | 10001/23015 [16:56<21:48,  9.94it/s]

10000


 48%|████▊     | 11001/23015 [18:37<20:20,  9.84it/s]

11000


 52%|█████▏    | 12001/23015 [20:16<18:09, 10.11it/s]

12000


 56%|█████▋    | 13001/23015 [21:56<16:33, 10.08it/s]

13000


 61%|██████    | 14001/23015 [23:36<15:11,  9.89it/s]

14000


 65%|██████▌   | 15002/23015 [25:16<13:17, 10.04it/s]

15000


 70%|██████▉   | 16001/23015 [26:56<11:48,  9.90it/s]

16000


 74%|███████▍  | 17001/23015 [28:37<10:16,  9.76it/s]

17000


 78%|███████▊  | 18002/23015 [30:18<08:24,  9.93it/s]

18000


 83%|████████▎ | 19001/23015 [31:58<06:48,  9.84it/s]

19000


 87%|████████▋ | 20000/23015 [33:39<05:05,  9.88it/s]

20000


 91%|█████████▏| 21002/23015 [35:20<03:22,  9.96it/s]

21000


 96%|█████████▌| 22001/23015 [37:00<01:46,  9.54it/s]

22000


100%|█████████▉| 23001/23015 [38:43<00:01, 10.14it/s]

23000


100%|██████████| 23015/23015 [38:45<00:00,  9.90it/s]


In [30]:
answers

[{'id': 118598,
  'songs': [207912,
   623047,
   422438,
   703323,
   638488,
   322215,
   140837,
   690767,
   157900,
   39436,
   516376,
   11657,
   439161,
   78983,
   569715,
   394031,
   413459,
   213435,
   328908,
   472374,
   217622,
   262430,
   58773,
   557956,
   66475,
   207558,
   209622,
   494552,
   236393,
   607459,
   551157,
   367826,
   411438,
   292859,
   654757,
   448547,
   385871,
   387859,
   76888,
   187531,
   358186,
   326204,
   205757,
   438857,
   695032,
   542127,
   693988,
   571016,
   447762,
   140867,
   625875,
   459256,
   654428,
   620311,
   272379,
   307938,
   616144,
   32120,
   412246,
   545816,
   646988,
   572238,
   671973,
   464051,
   333595,
   570479,
   365613,
   421833,
   249378,
   55791,
   455806,
   1133,
   219415,
   284913,
   376435,
   585728,
   255937,
   112399,
   680970,
   61771,
   613740,
   400781,
   554751,
   278184,
   493762,
   224921,
   640239,
   146989,
   467269,
   5049

In [31]:
write_json(answers, "results.json")

In [None]:
evaluator = CustomEvaluator()
evaluator.evaluate("arena_data/val.json", "arena_data/results.json")

In [None]:
sum((1.0 / np.log(i + 2) for i in range(1)))