In [1]:
import pandas as pd
from secrets import localhost_db, postgres_db
from sqlalchemy import create_engine
from queries import *

df_a = pd.read_csv('data/TopArtists.csv')
df_t = pd.read_csv('data/TopTracks.csv')
df_g = pd.read_csv('data/TopGenres.csv')
df_m = pd.read_csv('data/MusicFeatures.csv')

In [2]:
df_a['user_id'].unique()

array([12153521253, 12120382831])

In [3]:
u1_a = df_a.loc[df_a['user_id'] == 12153521253]
u1_t = df_t.loc[df_t['user_id'] == 12153521253]
u1_g = df_g.loc[df_g['user_id'] == 12153521253]
u1_m = df_m.loc[df_m['user_id'] == 12153521253]

In [4]:
u2_a = df_a.loc[df_a['user_id'] == 12120382831]
u2_t = df_t.loc[df_t['user_id'] == 12120382831]
u2_g = df_g.loc[df_g['user_id'] == 12120382831]
u2_m = df_m.loc[df_m['user_id'] == 12120382831]

In [5]:
def top_to_dict(top_df):
    top_dict = {}
    top_dict['Short'] = top_df.loc[top_df['timeframe'] == 'Short'].to_dict('records')
    top_dict['Medium'] = top_df.loc[top_df['timeframe'] == 'Medium'].to_dict('records')
    top_dict['Long'] = top_df.loc[top_df['timeframe'] == 'Long'].to_dict('records')
    return top_dict

In [6]:
def get_artist_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe, ['artist_id', 'artist', 'rank']]
    df2 = u2.loc[u2['timeframe'] == timeframe, ['artist_id', 'artist', 'rank']]
    df = df1.merge(df2, on=['artist_id', 'artist'], how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[['rank_x', 'rank_y']].max(axis=1))
    df['score'] = df['score'].fillna(0)
    df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_track_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe, ['track_id', 'track', 'rank']]
    df2 = u2.loc[u2['timeframe'] == timeframe, ['track_id', 'track', 'rank']]
    df = df1.merge(df2, on=['track_id', 'track'], how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[['rank_x', 'rank_y']].max(axis=1))
    df['score'] = df['score'].fillna(0)
    df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_genre_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe, ['genre', 'rank']]
    df2 = u2.loc[u2['timeframe'] == timeframe, ['genre', 'rank']]
    df = df1.merge(df2, on='genre', how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].max(axis=1))
    df['score'] = df['score'].fillna(0)
    df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def calculate_similarity(df):
    return round(df.sum()['score'] / df.sum()['base'], 4)

def calculate_feature_similarity(u1, u2, timeframe='Long'):
    features1 = u1.loc[u1['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    features2 = u2.loc[u2['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    scores = []
    for i in range(len(features1)):
        f1 = abs(features1[i])
        f2 = abs(features2[i])
        scores.append(min(f1, f2) / max(f1, f2))
    return round(sum(scores) / len(scores), 4)

def calculate_score(rank, weight=16, shift=4):
    return weight / ((0.1 * rank + shift) ** 2) 

In [7]:
tf_weights = {'Short': 3, 'Medium': 2, 'Long': 1}
mu_weights = {'artist': 4, 'track': 1, 'genre': 8, 'feature': 2}
final_score = 0

for timeframe in ['Short', 'Medium', 'Long']:
    tf_score = 0
    # Artist
    df_artist = get_artist_similarity(u1_a, u2_a, timeframe)
    tf_score += mu_weights['artist'] * calculate_similarity(df_artist)
    # Track
    df_track = get_track_similarity(u1_t, u2_t, timeframe)
    tf_score += mu_weights['track'] * calculate_similarity(df_track)
    # Genre
    df_genre = get_genre_similarity(u1_g, u2_g, timeframe)
    tf_score += mu_weights['genre'] * calculate_similarity(df_genre)
    # Features
    tf_score += mu_weights['feature'] * calculate_feature_similarity(u1_m, u2_m)
    # Timeframe overall score
    tf_score /= sum(mu_weights.values())
    print(tf_score)
    final_score += tf_weights[timeframe] * tf_score

final_score /= sum(tf_weights.values())
final_score

0.22452
0.26435333333333333
0.3034


0.2509444444444444

In [8]:
df_a = get_artist_similarity(u1_a, u2_a, 'Short')
df_a.loc[df_a['score'] > 0]

Unnamed: 0,artist_id,artist,12153521253,12120382831,base,score
3,3mIj9lX2MWuHmhNCA7LSCW,The 1975,4.0,1.0,0.951814,0.826446
7,77SW9BnxLY8rJ0RciFqkHh,The Neighbourhood,8.0,34.0,0.694444,0.292184
8,1Xyo4u8uXC1ZmMpatF05PJ,The Weeknd,9.0,24.0,0.666389,0.390625
10,5cIc3SBFuBLVxJz58W2tU9,Oh Wonder,11.0,29.0,0.615148,0.336064
11,2h93pZq0e7k5yf4dywlkpM,Frank Ocean,12.0,11.0,0.615148,0.591716
13,5K4W6rqBFWDnAN6FQUkS6x,Kanye West,14.0,8.0,0.694444,0.548697
17,1Bl6wpkWCQ4KVgnASpvzzA,BROCKHAMPTON,18.0,40.0,0.475624,0.25


In [9]:
df_g = get_genre_similarity(u1_g, u2_g, 'Short')
df_g.loc[df_g['score'] > 0]

Unnamed: 0,genre,12153521253,12120382831,base,score
0,Pop,1.0,1.0,0.951814,0.951814
1,Electropop,2.0,16.0,0.907029,0.510204
2,Modern Rock,3.0,10.0,0.865333,0.64
6,Modern Alternative Rock,7.0,9.0,0.72431,0.666389
10,Nu Gaze,11.0,11.0,0.615148,0.615148
11,Rock,12.0,12.0,0.591716,0.591716
12,Hip Hop,13.0,8.0,0.694444,0.569598
13,Rap,14.0,4.0,0.826446,0.548697
25,Alternative R&B,26.0,23.0,0.403124,0.367309
26,Lgbtq+ Hip Hop,27.0,39.0,0.356427,0.256369


### Extract data from PROD and update to DEV

In [8]:
engine = create_engine(postgres_db)
user = pd.read_sql('select * from "Users"', engine)
user_profiles = pd.read_sql('select * from "UserProfiles"', engine)
top_artists = pd.read_sql('select * from "TopArtists"', engine)
top_tracks = pd.read_sql('select * from "TopTracks"', engine)
top_genres = pd.read_sql('select * from "TopGenres"', engine)
music_features = pd.read_sql('select * from "MusicFeatures"', engine)
artists = pd.read_sql('select * from "Artists"', engine)
tracks = pd.read_sql('select * from "Tracks"', engine)
engine.dispose()

In [9]:
engine = create_engine(localhost_db)
user.to_sql('Users', engine, index=False, if_exists='replace')
user_profiles.to_sql('UserProfiles', engine, index=False, if_exists='replace')
top_artists[['user_id', 'rank', 'artist_id', 'timeframe']].to_sql('TopArtists', engine, index=False, if_exists='replace')
top_tracks[['user_id', 'rank', 'track_id', 'timeframe']].to_sql('TopTracks', engine, index=False, if_exists='replace')
top_genres.to_sql('TopGenres', engine, index=False, if_exists='replace')
music_features.to_sql('MusicFeatures', engine, index=False, if_exists='replace')
artists.to_sql('Artists', engine, index=False, if_exists='replace')
tracks.to_sql('Tracks', engine, index=False, if_exists='replace')
engine.dispose()

In [39]:
def compare_users(u1, u2):
    # Configs
    tf_weights = {'Short': 6, 'Medium': 5, 'Long': 4}
    mu_weights = {'artist': 4, 'track': 1, 'genre': 8, 'feature': 2}
    # Get data
    engine = create_engine(localhost_db)
    users = pd.read_sql(users2_query, engine, params={'user_ids': (u1, u2)})
    df_a = pd.read_sql(top_artists2_query, engine, params={'user_ids': (u1, u2)})
    df_t = pd.read_sql(top_tracks2_query, engine, params={'user_ids': (u1, u2)})
    df_g = pd.read_sql(top_genres2_query, engine, params={'user_ids': (u1, u2)})
    df_m = pd.read_sql(music_features2_query, engine, params={'user_ids': (u1, u2)})
    # User 1
    u1_a = df_a.loc[df_a['user_id'] == u1]
    u1_t = df_t.loc[df_t['user_id'] == u1]
    u1_g = df_g.loc[df_g['user_id'] == u1]
    u1_m = df_m.loc[df_m['user_id'] == u1]
    # User 2
    u2_a = df_a.loc[df_a['user_id'] == u2]
    u2_t = df_t.loc[df_t['user_id'] == u2]
    u2_g = df_g.loc[df_g['user_id'] == u2]
    u2_m = df_m.loc[df_m['user_id'] == u2]
    
    final_score = 0
    similar_artists = pd.DataFrame()
    similar_tracks = pd.DataFrame()
    similar_genres = pd.DataFrame()
    
    name1 = users.loc[users['user_id'] == u1]['display_name'].unique().item()
    name2 = users.loc[users['user_id'] == u2]['display_name'].unique().item()
    print('Comparing {} and {}...'.format(name1, name2))

    for timeframe in ['Short', 'Medium', 'Long']:
        tf_score = 0
        # Artist
        df_artist = get_artist_similarity(u1_a, u2_a, timeframe)
        tf_score += mu_weights['artist'] * calculate_similarity(df_artist)
        df_artist = df_artist.loc[df_artist['score'] > 0]
        df_artist['rank'] = df_artist.reset_index().index + 1
        similar_artists = similar_artists.append(df_artist)
        # Track
        df_track = get_track_similarity(u1_t, u2_t, timeframe)
        tf_score += mu_weights['track'] * calculate_similarity(df_track)
        similar_tracks = similar_tracks.append(df_track.loc[df_track['score'] > 0])
        # Genre
        df_genre = get_genre_similarity(u1_g, u2_g, timeframe)
        tf_score += mu_weights['genre'] * calculate_similarity(df_genre)
        similar_genres = similar_genres.append(df_genre.loc[df_genre['score'] > 0])
        # Features
        tf_score += mu_weights['feature'] * calculate_feature_similarity(u1_m, u2_m)
        # Timeframe overall score
        tf_score /= sum(mu_weights.values())
        print('{} term music taste similarity: {:.2f}'.format(timeframe, tf_score * 100))
        final_score += tf_weights[timeframe] * tf_score

    final_score /= sum(tf_weights.values())
    print('Overall music taste similarity: {:.2f}'.format(final_score * 100))
    
    return final_score, users, similar_artists, similar_tracks, similar_genres


def get_artist_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=['artist_id', 'timeframe'], how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[['rank_x', 'rank_y']].max(axis=1))
    df['score'] = df['score'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_track_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=['track_id', 'timeframe'], how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[['rank_x', 'rank_y']].max(axis=1))
    df['score'] = df['score'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_genre_similarity(u1, u2, timeframe='Long'):
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=['genre', 'timeframe'], how='outer').fillna(0)
    df['base'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'score'] = calculate_score(df[df[['rank_x', 'rank_y']] > 0].max(axis=1))
    df['score'] = df['score'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def calculate_similarity(df):
    return round(df.sum()['score'] / df.sum()['base'], 4)

def calculate_feature_similarity(u1, u2, timeframe='Long'):
    features1 = u1.loc[u1['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    features2 = u2.loc[u2['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    scores = []
    for i in range(len(features1)):
        f1 = abs(features1[i])
        f2 = abs(features2[i])
        scores.append(min(f1, f2) / max(f1, f2))
    return round(sum(scores) / len(scores), 4)

def calculate_score(rank, weight=16, shift=4):
    return weight / ((0.1 * rank + shift) ** 2) 

In [32]:
user_ids = ['12120382831', '12153521253', '1279967390', '12179805550']

for i in range(len(user_ids)):
    for j in range(i+1, len(user_ids)):
        user_id1 = user_ids[i]
        user_id2 = user_ids[j]
        compare_users(user_id1, user_id2)
        print()

Comparing Bin Xuan Kong and Thivya Dharishinie...
Short term music taste similarity: 38.59
Medium term music taste similarity: 32.17
Long term music taste similarity: 31.80
Overall music taste similarity: 34.64

Comparing Bin Xuan Kong and Jae Sheng Ang...
Short term music taste similarity: 21.49
Medium term music taste similarity: 31.25
Long term music taste similarity: 28.78
Overall music taste similarity: 26.69

Comparing Bin Xuan Kong and Clement Tan...
Short term music taste similarity: 23.51
Medium term music taste similarity: 29.10
Long term music taste similarity: 28.44
Overall music taste similarity: 26.69

Comparing Thivya Dharishinie and Jae Sheng Ang...
Short term music taste similarity: 16.00
Medium term music taste similarity: 20.87
Long term music taste similarity: 27.92
Overall music taste similarity: 20.80

Comparing Thivya Dharishinie and Clement Tan...
Short term music taste similarity: 15.84
Medium term music taste similarity: 23.28
Long term music taste similarity:

In [40]:
s, u, a, t, g = compare_users(user_ids[0], user_ids[1])

Comparing Bin Xuan Kong and Thivya Dharishinie...
Short term music taste similarity: 38.59
Medium term music taste similarity: 32.17
Long term music taste similarity: 31.80
Overall music taste similarity: 34.64


In [41]:
a.head()

Unnamed: 0,user_id_x,rank_x,artist_id,timeframe,user_id_y,rank_y,base,score,rank
0,12120382831,1.0,00FQb4jTyendYWaN8pK0wa,Short,12153521253,15.0,0.951814,0.528926,1
1,12120382831,2.0,3mIj9lX2MWuHmhNCA7LSCW,Short,12153521253,3.0,0.907029,0.865333,2
3,12120382831,4.0,1Xyo4u8uXC1ZmMpatF05PJ,Short,12153521253,11.0,0.826446,0.615148,3
4,12120382831,5.0,2h93pZq0e7k5yf4dywlkpM,Short,12153521253,12.0,0.790123,0.591716,4
6,12120382831,7.0,5K4W6rqBFWDnAN6FQUkS6x,Short,12153521253,13.0,0.72431,0.569598,5


In [43]:
def get_similar_artists(df_a):
    engine = create_engine(localhost_db)
    artists = pd.read_sql_query(similar_artists_query, engine, params={'artist_ids': tuple(df_a['artist_id'].tolist())})
    engine.dispose()
    df = df_a.merge(artists, on=['artist_id'])
    return df.sort_values(['timeframe', 'rank'])

def get_similar_tracks(df_t):
    engine = create_engine(localhost_db)
    tracks = pd.read_sql_query(similar_tracks_query, engine, params={'track_ids': tuple(df_t['track_id'].tolist())})
    engine.dispose()
    df = df_t.merge(tracks, on=['track_id'])
    return df.sort_values(['timeframe', 'score'])

In [44]:
df = get_similar_artists(a)
df.head()

Unnamed: 0,user_id_x,rank_x,artist_id,timeframe,user_id_y,rank_y,base,score,rank,artist,artist_url,artist_image
12,12120382831,1.0,5K4W6rqBFWDnAN6FQUkS6x,Long,12153521253,6.0,0.951814,0.756144,1,Kanye West,https://open.spotify.com/artist/5K4W6rqBFWDnAN...,https://i.scdn.co/image/bd1c6fdf3705cf9b7d0c8a...
6,12120382831,2.0,1Xyo4u8uXC1ZmMpatF05PJ,Long,12153521253,26.0,0.907029,0.367309,2,The Weeknd,https://open.spotify.com/artist/1Xyo4u8uXC1ZmM...,https://i.scdn.co/image/d9a875c37277c35b94c60c...
37,12120382831,4.0,73sIBHcqh3Z3NyqHKZ7FOL,Long,12153521253,32.0,0.826446,0.308642,3,Childish Gambino,https://open.spotify.com/artist/73sIBHcqh3Z3Ny...,https://i.scdn.co/image/2e1d974c14fb66eb5050bf...
38,12120382831,6.0,69GGBxA162lTqCwzJG5jLp,Long,12153521253,27.0,0.756144,0.356427,4,The Chainsmokers,https://open.spotify.com/artist/69GGBxA162lTqC...,https://i.scdn.co/image/960547a625bc2eb742bb3d...
39,12120382831,7.0,3TVXtAsR1Inumwj472S9r4,Long,12153521253,12.0,0.72431,0.591716,5,Drake,https://open.spotify.com/artist/3TVXtAsR1Inumw...,https://i.scdn.co/image/60cfab40c6bb160a1906be...


In [22]:
x = df.loc[df['timeframe'] == 'Short']
x

Unnamed: 0,user_id_x,rank_x,artist_id,timeframe,user_id_y,rank_y,base,score,artist,artist_url,artist_image
0,12120382831,1.0,00FQb4jTyendYWaN8pK0wa,Short,12153521253,15.0,0.951814,0.528926,Lana Del Rey,https://open.spotify.com/artist/00FQb4jTyendYW...,https://i.scdn.co/image/d79b3e6cc42de7f44c8f13...
1,12120382831,2.0,3mIj9lX2MWuHmhNCA7LSCW,Short,12153521253,3.0,0.907029,0.865333,The 1975,https://open.spotify.com/artist/3mIj9lX2MWuHmh...,https://i.scdn.co/image/94dd2feca73bdfb7e1c127...
4,12120382831,4.0,1Xyo4u8uXC1ZmMpatF05PJ,Short,12153521253,11.0,0.826446,0.615148,The Weeknd,https://open.spotify.com/artist/1Xyo4u8uXC1ZmM...,https://i.scdn.co/image/d9a875c37277c35b94c60c...
7,12120382831,5.0,2h93pZq0e7k5yf4dywlkpM,Short,12153521253,12.0,0.790123,0.591716,Frank Ocean,https://open.spotify.com/artist/2h93pZq0e7k5yf...,https://i.scdn.co/image/7db34c8aace6feb91f3860...
10,12120382831,7.0,5K4W6rqBFWDnAN6FQUkS6x,Short,12153521253,13.0,0.72431,0.569598,Kanye West,https://open.spotify.com/artist/5K4W6rqBFWDnAN...,https://i.scdn.co/image/bd1c6fdf3705cf9b7d0c8a...
13,12120382831,9.0,4BxCuXFJrSWGi1KHcVqaU4,Short,12153521253,4.0,0.826446,0.666389,Kodaline,https://open.spotify.com/artist/4BxCuXFJrSWGi1...,https://i.scdn.co/image/09b8d9b95ef677ec6c10af...
15,12120382831,12.0,1QAJqy2dA3ihHBFIHRphZj,Short,12153521253,5.0,0.790123,0.591716,Cigarettes After Sex,https://open.spotify.com/artist/1QAJqy2dA3ihHB...,https://i.scdn.co/image/074a07d0cbf2a30290b5b5...
18,12120382831,14.0,3kzwYV3OCB010YfXMF0Avt,Short,12153521253,8.0,0.694444,0.548697,Alvvays,https://open.spotify.com/artist/3kzwYV3OCB010Y...,https://i.scdn.co/image/098b97354be95aa4e5f3ca...
20,12120382831,15.0,5cIc3SBFuBLVxJz58W2tU9,Short,12153521253,9.0,0.666389,0.528926,Oh Wonder,https://open.spotify.com/artist/5cIc3SBFuBLVxJ...,https://i.scdn.co/image/b34c6322f787c61e0be561...
22,12120382831,20.0,3IunaFjvNKj98JW89JYv9u,Short,12153521253,6.0,0.756144,0.444444,The Japanese House,https://open.spotify.com/artist/3IunaFjvNKj98J...,https://i.scdn.co/image/848b92c2487efb37ba4c75...


In [30]:
x['rank'] = x.reset_index().index + 1
x

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['rank'] = x.reset_index().index + 1


Unnamed: 0,user_id_x,rank_x,artist_id,timeframe,user_id_y,rank_y,base,score,artist,artist_url,artist_image,rank
0,12120382831,1.0,00FQb4jTyendYWaN8pK0wa,Short,12153521253,15.0,0.951814,0.528926,Lana Del Rey,https://open.spotify.com/artist/00FQb4jTyendYW...,https://i.scdn.co/image/d79b3e6cc42de7f44c8f13...,1
1,12120382831,2.0,3mIj9lX2MWuHmhNCA7LSCW,Short,12153521253,3.0,0.907029,0.865333,The 1975,https://open.spotify.com/artist/3mIj9lX2MWuHmh...,https://i.scdn.co/image/94dd2feca73bdfb7e1c127...,2
4,12120382831,4.0,1Xyo4u8uXC1ZmMpatF05PJ,Short,12153521253,11.0,0.826446,0.615148,The Weeknd,https://open.spotify.com/artist/1Xyo4u8uXC1ZmM...,https://i.scdn.co/image/d9a875c37277c35b94c60c...,3
7,12120382831,5.0,2h93pZq0e7k5yf4dywlkpM,Short,12153521253,12.0,0.790123,0.591716,Frank Ocean,https://open.spotify.com/artist/2h93pZq0e7k5yf...,https://i.scdn.co/image/7db34c8aace6feb91f3860...,4
10,12120382831,7.0,5K4W6rqBFWDnAN6FQUkS6x,Short,12153521253,13.0,0.72431,0.569598,Kanye West,https://open.spotify.com/artist/5K4W6rqBFWDnAN...,https://i.scdn.co/image/bd1c6fdf3705cf9b7d0c8a...,5
13,12120382831,9.0,4BxCuXFJrSWGi1KHcVqaU4,Short,12153521253,4.0,0.826446,0.666389,Kodaline,https://open.spotify.com/artist/4BxCuXFJrSWGi1...,https://i.scdn.co/image/09b8d9b95ef677ec6c10af...,6
15,12120382831,12.0,1QAJqy2dA3ihHBFIHRphZj,Short,12153521253,5.0,0.790123,0.591716,Cigarettes After Sex,https://open.spotify.com/artist/1QAJqy2dA3ihHB...,https://i.scdn.co/image/074a07d0cbf2a30290b5b5...,7
18,12120382831,14.0,3kzwYV3OCB010YfXMF0Avt,Short,12153521253,8.0,0.694444,0.548697,Alvvays,https://open.spotify.com/artist/3kzwYV3OCB010Y...,https://i.scdn.co/image/098b97354be95aa4e5f3ca...,8
20,12120382831,15.0,5cIc3SBFuBLVxJz58W2tU9,Short,12153521253,9.0,0.666389,0.528926,Oh Wonder,https://open.spotify.com/artist/5cIc3SBFuBLVxJ...,https://i.scdn.co/image/b34c6322f787c61e0be561...,9
22,12120382831,20.0,3IunaFjvNKj98JW89JYv9u,Short,12153521253,6.0,0.756144,0.444444,The Japanese House,https://open.spotify.com/artist/3IunaFjvNKj98J...,https://i.scdn.co/image/848b92c2487efb37ba4c75...,10


In [19]:
get_similar_tracks(t)

Unnamed: 0,user_id_x,rank_x,track_id,timeframe,user_id_y,rank_y,base,score,track,artists,album,track_url,album_image
0,12120382831,20.0,73jVPicY2G9YHmzgjk69ae,Medium,12153521253,32.0,0.444444,0.308642,Robbers,The 1975,The 1975,https://open.spotify.com/track/73jVPicY2G9YHmz...,https://i.scdn.co/image/ab67616d0000b27304f21e...


In [20]:
g

Unnamed: 0,user_id_x,rank_x,genre,points_x,timeframe,user_id_y,rank_y,points_y,base,score
0,12120382831,1.0,Pop,4.041452,Short,12153521253,1.0,5.230431,0.951814,0.951814
3,12120382831,4.0,Rap,2.002428,Short,12153521253,14.0,0.574837,0.826446,0.548697
7,12120382831,8.0,Hip Hop,1.140220,Short,12153521253,13.0,0.617077,0.694444,0.569598
8,12120382831,9.0,Modern Alternative Rock,1.123967,Short,12153521253,7.0,1.048123,0.724310,0.666389
9,12120382831,10.0,Modern Rock,1.123967,Short,12153521253,3.0,1.617012,0.865333,0.640000
...,...,...,...,...,...,...,...,...,...,...
45,12120382831,46.0,Modern Alternative Rock,0.142222,Long,12153521253,41.0,0.152200,0.243865,0.216333
46,12120382831,47.0,Modern Rock,0.142222,Long,12153521253,29.0,0.347774,0.336064,0.211389
47,12120382831,48.0,Nu Gaze,0.142222,Long,12153521253,42.0,0.152200,0.237954,0.206612
48,12120382831,49.0,Rock,0.142222,Long,12153521253,43.0,0.152200,0.232254,0.201995


In [34]:
DATABASE_URL = localhost_db
code = 'good-plane-60'

ADJECTIVES = ['good', 'bad', 'new', 'old', 'first', 'last', 'long', 'short', 'little', 'big', \
              'right', 'wrong', 'high', 'short', 'large', 'small', 'different', 'same', 'best', 'worst', \
              'easy', 'difficult', 'soft', 'hard', 'major', 'minor', 'public', 'private', 'real', 'fake', \
              'red', 'orange', 'yellow', 'green', 'blue', 'purple', 'black', 'white', 'grey', 'brown', \
              'pink', 'violet', 'indigo', 'silver', 'gold', 'teal', 'lime', 'maroon', 'olive', 'cyan']

NOUNS = ['time', 'year', 'month', 'day', 'week', 'thing', 'man', 'woman', 'boy', 'girl', \
         'world', 'life', 'eye', 'nose', 'ear', 'mouth', 'hair', 'hand', 'foot', 'leg', \
         'car', 'bus', 'bike', 'train', 'plane', 'boat', 'ship', 'tank', 'truck', 'taxi', \
         'dog', 'cat', 'mouse', 'cow', 'goat', 'horse', 'deer', 'rabbit', 'bird', 'monkey', \
         'bee', 'bear', 'chicken', 'fox', 'panda', 'frog', 'tiger', 'lion', 'duck', 'wolf']

code_parts = code.split('-')
if len(code_parts) != 3 or code_parts[0] not in ADJECTIVES or code_parts[1] not in NOUNS or \
    not (0 < int(code_parts[2]) < 100):
    print('con 1 fail')
engine = create_engine(DATABASE_URL)
df_u = pd.read_sql('SELECT user_id FROM "UserProfiles" WHERE code = %(code)s', engine, params={'code': code})
if len(df_u) != 1:
    print('con 2 fail')
df_u['user_id'].item()

'12120382831'