In [1]:
import pickle as pkl
import pandas as pd
import numpy as np

In [2]:
with open('./pkl/hitter_info.pkl', 'rb') as f:
    hitter_info = pkl.load(f)

with open('./pkl/pitcher_info.pkl', 'rb') as f:
    pitcher_info = pkl.load(f)

with open('./model/weight.pkl', 'rb') as f:
    weight = pkl.load(f)

In [3]:
batter_vec = weight['hitter_layer.0.weight'].cpu()
pitcher_vec = weight['pitcher_layer.0.weight'].cpu()
player_vecs = {"batter": batter_vec, "pitcher": pitcher_vec}

In [4]:
VEC_SIZE = batter_vec[0].shape[0]

In [5]:
latent_vecs = {'batter':np.array(batter_vec), 'pitcher':np.array(pitcher_vec)}

In [6]:
player_names = {'batter': hitter_info.HITNAME.values.tolist(), 'pitcher':pitcher_info.PITNAME.values.tolist()}

In [7]:
bat_player_df = pd.DataFrame(batter_vec.tolist(), columns=["latent{0}".format(i) for i in range(VEC_SIZE)])
bat_player_df['player_id'] = hitter_info.HITIDX.values.tolist()
bat_player_df['name'] = hitter_info.HITNAME.values.tolist()

In [8]:
pit_player_df = pd.DataFrame(pitcher_vec.tolist(), columns=["latent{0}".format(i) for i in range(VEC_SIZE)])
pit_player_df['player_id'] = pitcher_info.PITIDX.values.tolist()
pit_player_df['name'] = pitcher_info.PITNAME.values.tolist()

In [9]:
def get_nearest_neighbors(name, data, latent_vecs, player_names, k = 5):
    """Print the k nearest neighbors (in the latent space) of a given player.
    
    :param name: 
    :param data: 
    :param latent_vecs: 
    :param player_names: 
    :param k: 
    :return: 
    """
    player_index = np.where(data["name"] == name)[0]
    player_latent = latent_vecs[player_index]
    distances = 1 - np.dot(latent_vecs, player_latent.T).flatten() / (np.linalg.norm(latent_vecs, axis = 1) * np.linalg.norm(player_latent))
    distances_and_ids = list(zip(player_names, distances))
    distances_and_ids.sort(key = lambda x: x[1])
    
    return distances_and_ids[1:1 + k]

In [10]:
for batter in ["박해민"]:
    print(batter)
    print(get_nearest_neighbors(batter, bat_player_df, latent_vecs["batter"], player_names["batter"]))
    print()
    
print("================================================================================================")

for pitcher in ["선동열"]:
    print(pitcher)
    print(get_nearest_neighbors(pitcher, pit_player_df, latent_vecs["pitcher"], player_names["pitcher"]))
    print()

print("================================================================================================")

for batter in ["이승엽"]:
    print(batter)
    print(get_nearest_neighbors(batter, bat_player_df, latent_vecs["batter"], player_names["batter"]))
    print()
    
for batter in ["이종범"]:
    print(batter)
    print(get_nearest_neighbors(batter, bat_player_df, latent_vecs["batter"], player_names["batter"]))
    print()
    
print("================================================================================================")

for pitcher in ["양현종"]:
    print(pitcher)
    print(get_nearest_neighbors(pitcher, pit_player_df, latent_vecs["pitcher"], player_names["pitcher"]))
    print()
    
print("================================================================================================")

for batter in ["이용규"]:
    print(batter)
    print(get_nearest_neighbors(batter, bat_player_df, latent_vecs["batter"], player_names["batter"]))
    print()

박해민
[('장민석', 0.1094017), ('장일현', 0.16128415), ('이상대', 0.1707263), ('이종욱', 0.1723364), ('이희성', 0.17276889)]

선동열
[('임창용', 0.08846742), ('조웅천', 0.15325212), ('이대진', 0.17722994), ('윤석민', 0.18363696), ('박준수', 0.1958093)]

이승엽
[('테임즈', 0.079945385), ('김재환', 0.08488554), ('박정권', 0.092071), ('가르시아', 0.09557867), ('클락', 0.10115534)]

이종범
[('홍세완', 0.12456423), ('이광은', 0.18308526), ('강기웅', 0.19776559), ('유한준', 0.1986863), ('이택근', 0.21009177)]

양현종
[('김광현', 0.11396772), ('차우찬', 0.120057106), ('장현식', 0.14668423), ('박정진         ', 0.15260518), ('이상열', 0.16589892)]

이용규
[('정수근', 0.07207322), ('박흥식', 0.14335763), ('전준호', 0.1574642), ('박재벌', 0.18299693), ('김실', 0.2125771)]

