In [1]:
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
import os
import numpy as np

In [2]:
artist = 'Taylor Swift'

### 特定のアーティストの曲を聴いたユーザのデータを取得

In [3]:
saved_path = './user_analysis/{}/'.format(artist)

In [4]:
target_user_interaction_df = pd.read_csv(saved_path + 'target_user_interaction.csv')
target_user_interaction_df = target_user_interaction_df.drop('Unnamed: 0', axis=1)
target_user_interaction_df.head()

Unnamed: 0,user_id,item_id,org_id,freebase_id,track_name,artist_id,artist_name
0,30,5254,288441,m.0fs1q26,Kilojoules,19954.0,Freelance Whales
1,30,5708,746008,m.0fqdyt4,The Great Estates,19954.0,Freelance Whales
2,30,5248,746013,m.0fyhs9l,Location,19954.0,Freelance Whales
3,30,24476,252966,m.0g0yxk0,Broken Horse,19954.0,Freelance Whales
4,30,5238,347396,m.0fvqyk7,"Oh, Maker",15910.0,Janelle Monáe


In [5]:
user_id_ratio_df = pd.read_csv(saved_path + 'user_id-ratio.csv')
user_id_ratio_df = user_id_ratio_df.drop('Unnamed: 0', axis=1)
user_id_ratio_df.head()

Unnamed: 0,user_id,ratio
0,30,0.024691
1,31,0.094203
2,67,0.001351
3,89,0.011577
4,109,0.04


In [6]:
target_user = list(target_user_interaction_df['user_id'].unique())

### 特定のアーティストのアイテムを取得

In [7]:
saved_path = './artist_item_data/{}.csv'.format(artist)
item_df = pd.read_csv(saved_path)
item_df = item_df.drop('Unnamed: 0', axis=1)
item_df.head()

Unnamed: 0,item_id,track_name,artist_name,interaction
0,5255,Hey Stephen,Taylor Swift,22.0
1,5392,Mary's Song (Oh My My My),Taylor Swift,13.0
2,5361,Tied Together With a Smile,Taylor Swift,8.0
3,20463,Fifteen,Taylor Swift,16.0
4,25060,Dear John,Taylor Swift,25.0


In [8]:
target_item = list(item_df['item_id'].unique() )

### embedding のデータを取得

In [9]:
model = 'malel_last-fm_epoch=300'
saved_path = '../RestoreModel/{}/emb_2_dim/'.format(model)

In [10]:
user_2dim_df = pd.read_csv(saved_path + 'user_emb_2dim.csv')
user_2dim_df = user_2dim_df.drop('Unnamed: 0', axis=1)
user_2dim_df.head()

Unnamed: 0,id,type,x,y
0,0,user,-23.38933,16.841595
1,1,user,27.926821,19.098354
2,2,user,-21.620014,21.031384
3,3,user,8.001935,28.562372
4,4,user,39.398445,-10.006981


In [11]:
entity_2dim_df = pd.read_csv(saved_path + 'entity_emb_2dim.csv')
entity_2dim_df = entity_2dim_df.drop('Unnamed: 0', axis=1)
entity_2dim_df.head()

Unnamed: 0,id,type,x,y
0,0,entity,5.5196,5.664539
1,1,entity,-8.202256,30.739899
2,2,entity,-23.490604,16.579607
3,3,entity,0.374067,23.597845
4,4,entity,-23.443825,16.330036


### 特定のアーティストに関係のあるユーザ・アイテムの embedding を抽出

In [12]:
target_user_2dim_df = user_2dim_df.query('id in {}'.format(target_user))
target_user_2dim_df = target_user_2dim_df.reset_index(drop=True)
target_user_2dim_df.head()

Unnamed: 0,id,type,x,y
0,30,user,39.243977,10.63642
1,31,user,-3.829041,25.632372
2,67,user,39.845177,-9.426232
3,89,user,24.086521,-17.296921
4,109,user,15.483473,-28.955711


In [13]:
target_user_2dim_df = pd.merge(
    target_user_2dim_df, user_id_ratio_df,
    left_on = 'id',
    right_on = 'user_id',
    how = 'left'
)
target_user_2dim_df = target_user_2dim_df.drop('user_id', axis=1)
target_user_2dim_df.head()

Unnamed: 0,id,type,x,y,ratio
0,30,user,39.243977,10.63642,0.024691
1,31,user,-3.829041,25.632372,0.094203
2,67,user,39.845177,-9.426232,0.001351
3,89,user,24.086521,-17.296921,0.011577
4,109,user,15.483473,-28.955711,0.04


In [14]:
artist_entity_id = 64848
target_item.append(artist_entity_id)

target_entity_2dim_df = entity_2dim_df.query('id in {}'.format(target_item))
target_entity_2dim_df = target_entity_2dim_df.reset_index(drop=True)
target_entity_2dim_df.head()

Unnamed: 0,id,type,x,y
0,5255,entity,35.704029,-7.71191
1,5269,entity,35.603611,-7.714102
2,5277,entity,35.588772,-7.764375
3,5313,entity,35.685799,-7.700068
4,5332,entity,35.319408,-7.799895


In [15]:
target_entity_2dim_df = pd.merge(
    target_entity_2dim_df, item_df,
    how = 'left',
    left_on = 'id',
    right_on = 'item_id'
)

In [16]:
target_entity_2dim_df[target_entity_2dim_df['id']==64848]

Unnamed: 0,id,type,x,y,item_id,track_name,artist_name,interaction
23,64848,entity,-19.901268,-20.312155,,,,


### データを保存

In [17]:
save_path = './user_analysis/{}/'.format(artist)

if not os.path.exists(save_path):
    os.makedirs(save_path)

In [18]:
target_user_2dim_df.to_csv(save_path + 'user_data.csv')
target_entity_2dim_df.to_csv(save_path + 'entity_data.csv')