# Content-based using track features

## File paths

In [1]:
data_root = './data'
user_1 = data_root + '/user1.csv'
user_2 = data_root + '/user2.csv'
user_3 = data_root + '/user3.csv'
full_data = data_root + '/full_data.csv'

fields = data_root + '/common_fields.txt'

## Libraries

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

## Data

In [3]:
data_df = pd.read_csv(full_data)
user1_df = pd.read_csv(user_1)
user2_df = pd.read_csv(user_2)
user3_df = pd.read_csv(user_3)

In [4]:
feature_fields = []
with open(fields, 'r') as f:
  for l in f:
    if l.strip() not in ['artists', 'id', 'uri', 'track_href', 'analysis_url']:
      feature_fields.append(l.strip())

In [5]:
feature_fields

['track_name',
 'popularity',
 'danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'duration_ms',
 'time_signature']

In [6]:
data_track = data_df[feature_fields].values.tolist()
user1_track = user1_df[feature_fields].values.tolist()
user2_track = user2_df[feature_fields].values.tolist()
user3_track = user3_df[feature_fields].values.tolist()

data_track = np.array(data_track)
user1_track = np.array(user1_track)
user2_track = np.array(user2_track)
user3_track = np.array(user3_track)

In [12]:
data_df = pd.read_csv(full_data)
user1_df = pd.read_csv(user_1)
user2_df = pd.read_csv(user_2)
user3_df = pd.read_csv(user_3)
raw_data_track = data_df.track_name
raw_user1_track = user1_df.track_name
raw_user2_track = user2_df.track_name
raw_user3_track = user3_df.track_name

## Similarity functions

In [7]:
def cosine_sim(x, y):
  x = np.array(x, dtype=float)
  y = np.array(y, dtype=float)
  k = np.dot(x, y.T)/(np.linalg.norm(x) * np.linalg.norm(y))
  return k

def pearson_sim(x, y):
  x = np.array(x, dtype=float)
  y = np.array(y, dtype=float)
  k, _ = pearsonr(x, y)
  return k

## Experiments on recommendations

In [16]:
def top_10_similarity(user_track, raw_user_track, similarity = 'cosine'):
  if similarity.lower() not in ['cosine', 'pearson']:
    return None

  temp = []
  if similarity.lower() == 'cosine':
    for track in data_track:
      if track[0] in list(raw_user_track): continue
      sim = cosine_sim(user_track[1:], track[1:])
      temp.append((track[0], sim))
  
  else:
    for track in data_track:
      if track[0] in list(raw_user_track): continue
      sim = pearson_sim(user_track[1:], track[1:])
      temp.append((track[0], sim))
  
  temp.sort(key = lambda x: x[1], reverse = True)
  return temp[:10]

In [17]:
# Get recommend for each track of user 1
# pearson, cosine
pear_recommend_user1 = []
cosine_recommend_user1 = []
for track in user1_track:
  pear_recommend_user1.append(top_10_similarity(track, raw_user1_track, similarity = 'pearson'))
  cosine_recommend_user1.append(top_10_similarity(track, raw_user1_track, similarity = 'cosine'))

In [18]:
# Get recommend for each track of user 2
# pearson, cosine
pear_recommend_user2 = []
cosine_recommend_user2 = []
for track in user2_track:
  pear_recommend_user2.append(top_10_similarity(track, raw_user2_track, similarity = 'pearson'))
  cosine_recommend_user2.append(top_10_similarity(track, raw_user2_track, similarity = 'cosine'))

In [19]:
# Get recommend for each track of user 3
# pearson, cosine
pear_recommend_user3 = []
cosine_recommend_user3 = []
for track in user3_track:
  pear_recommend_user3.append(top_10_similarity(track, raw_user3_track, similarity = 'pearson'))
  cosine_recommend_user3.append(top_10_similarity(track, raw_user3_track, similarity = 'cosine'))

In [20]:
pear_recommend_user1

[[('Cũng Đành Thôi', 0.9999999998822343),
  ('Và Thế Là Hết', 0.9999999998626196),
  ('Sài Gòn Đau Lòng Quá', 0.9999999998443474),
  ('Trên Tình Bạn Dưới Tình Yêu (with 16 Typh)', 0.9999999998156526),
  ('Mộng Mơ', 0.9999999997996984),
  ('Vì Yêu Cứ Đâm Đầu', 0.9999999997610907),
  ('Đã Biết Sẽ Có Ngày Hôm Qua', 0.9999999997570422),
  ('Matchanah', 0.9999999997146066),
  ('Xin Đừng Lặng Im', 0.9999999996864326),
  ('Đưa Nhau Đi Trốn (Chill Version)', 0.9999999996814894)],
 [('Xin Em Cho Tôi Một Cơ Hội', 0.9999999998775294),
  ('Mùa Đông Tàn Phai', 0.9999999998562092),
  ('Thuong Nhau Ngay Mua (feat. Don Ho)', 0.9999999998489232),
  ('Khi Cô Đơn Em Nhớ Ai', 0.9999999998307049),
  ('Không Thể Sống Thiếu Em', 0.9999999997863425),
  ('Những Mùa Dấu Yêu', 0.9999999997825159),
  ('Tình Yêu Vượt Thời Gian', 0.9999999997482835),
  ('Chiều Nghe Biển Khóc', 0.9999999997258422),
  ('nan', 0.9999999997019252),
  ('Tâm Hồn Xao Động', 0.9999999996735146)],
 [('? (feat. Dế Choắt)', 0.9999999999521021