In [12]:
import pandas as pd
from secrets import localhost_db
from sqlalchemy import create_engine
from queries import *

In [11]:
DATABASE_URL = localhost_db
TF_WEIGHTS = {0: 6, 1: 5, 2: 4}
MU_WEIGHTS = {'artist': 4, 'track': 1, 'genre': 6, 'feature': 2}

def get_user_from_code(code):
    try:
        code_parts = code.split('-')
        if len(code_parts) != 3 or code_parts[0] not in ADJECTIVES or code_parts[1] not in NOUNS or not (0 < int(code_parts[2]) < 100):
            return None
        engine = create_engine(DATABASE_URL)
        df_u = pd.read_sql('SELECT user_id FROM "UserProfiles" WHERE code = %(code)s', engine, params={'code': code})
        if len(df_u) != 1:
            return None
        return df_u['user_id'].item()
    except:
        return None

def compare_users(u1, u2):
    # Get data
    engine = create_engine(DATABASE_URL)
    users = pd.read_sql(users2_query, engine, params={'user_ids': (u1, u2)})
    df_a = pd.read_sql(top_artists2_query, engine, params={'user_ids': (u1, u2)})
    df_t = pd.read_sql(top_tracks2_query, engine, params={'user_ids': (u1, u2)})
    df_g = pd.read_sql(top_genres2_query, engine, params={'user_ids': (u1, u2)})
    df_m = df_t.groupby(['user_id', 'timeframe']).mean().reset_index()
    # User 1
    u1_a = df_a.loc[df_a['user_id'] == u1]
    u1_t = df_t.loc[df_t['user_id'] == u1]
    u1_g = df_g.loc[df_g['user_id'] == u1]
    u1_m = df_m.loc[df_m['user_id'] == u1]
    # User 2
    u2_a = df_a.loc[df_a['user_id'] == u2]
    u2_t = df_t.loc[df_t['user_id'] == u2]
    u2_g = df_g.loc[df_g['user_id'] == u2]
    u2_m = df_m.loc[df_m['user_id'] == u2]
    # Initialization
    final_points = 0
    similar_artists = pd.DataFrame()
    similar_tracks = pd.DataFrame()
    similar_genres = pd.DataFrame()
    # Compare
    for timeframe in TF_WEIGHTS.keys():
        tf_points = 0
        # Artist
        df_artist = get_artist_similarity(u1_a, u2_a, timeframe)
        tf_points += MU_WEIGHTS['artist'] * calculate_similarity(df_artist)
        df_artist = df_artist.loc[df_artist['points'] > 0].sort_values(by='points', ascending=False)
        df_artist['rank'] = df_artist.reset_index().index + 1
        similar_artists = similar_artists.append(df_artist)
        # Track
        df_track = get_track_similarity(u1_t, u2_t, timeframe)
        tf_points += MU_WEIGHTS['track'] * calculate_similarity(df_track)
        similar_tracks = similar_tracks.append(df_track.loc[df_track['points'] > 0])
        # Genre
        df_genre = get_genre_similarity(u1_g, u2_g, timeframe)
        tf_points += MU_WEIGHTS['genre'] * calculate_similarity(df_genre)
        similar_genres = similar_genres.append(df_genre.loc[df_genre['points'] > 0])
        # Features
        tf_points += MU_WEIGHTS['feature'] * calculate_feature_similarity(u1_m, u2_m, timeframe)
        # Timeframe overall points
        tf_points /= sum(MU_WEIGHTS.values())
        final_points += TF_WEIGHTS[timeframe] * tf_points
    # Final similarity score
    final_points /= sum(TF_WEIGHTS.values())
    return final_points, users, similar_artists, similar_tracks, similar_genres

def get_similar_users(user_id):
    df_u = pd.read_sql(similar_users_query, engine, params={'user_id': user_id})
    users = [user_id] + df_u['user_id'].tolist()
    df_a = pd.read_sql(top_artists2_query, engine, params={'user_ids': tuple(users)})
    df_t = pd.read_sql(top_tracks2_query, engine, params={'user_ids': tuple(users)})
    df_g = pd.read_sql(top_genres2_query, engine, params={'user_ids': tuple(users)})
    df_m = pd.read_sql(music_features2_query, engine, params={'user_ids': tuple(users)})
    # User
    u1_a = df_a.loc[df_a['user_id'] == user_id]
    u1_t = df_t.loc[df_t['user_id'] == user_id]
    u1_g = df_g.loc[df_g['user_id'] == user_id]
    u1_m = df_m.loc[df_m['user_id'] == user_id]
    dict_list = []
    # For each other user
    for _, row in df_u.iterrows():
        u2 = row['user_id']
        u2_a = df_a.loc[df_a['user_id'] == u2]
        u2_t = df_t.loc[df_t['user_id'] == u2]
        u2_g = df_g.loc[df_g['user_id'] == u2]
        u2_m = df_m.loc[df_m['user_id'] == u2]
        points = 0
        # Compare
        for timeframe in TF_WEIGHTS.keys():
            tf_points = 0
            df_artist = get_artist_similarity(u1_a, u2_a, timeframe)
            tf_points += MU_WEIGHTS['artist'] * calculate_similarity(df_artist)
            df_track = get_track_similarity(u1_t, u2_t, timeframe)
            tf_points += MU_WEIGHTS['track'] * calculate_similarity(df_track)
            df_genre = get_genre_similarity(u1_g, u2_g, timeframe)
            tf_points += MU_WEIGHTS['genre'] * calculate_similarity(df_genre)
            tf_points += MU_WEIGHTS['feature'] * calculate_feature_similarity(u1_m, u2_m)
            tf_points /= sum(MU_WEIGHTS.values())
            points += TF_WEIGHTS[timeframe] * tf_points
        # Final similarity score
        points /= sum(TF_WEIGHTS.values())
        dict_list.append({
            'user_id': u2,
            'display_name': row['display_name'],
            'image_url': row['image_url'],
            'points': points
        })
    return pd.DataFrame.from_dict(dict_list).sort_values(by='points', ascending=False)

def get_similar_artists(df_a):
    engine = create_engine(DATABASE_URL)
    artists = pd.read_sql_query(similar_artists_query, engine, params={'artist_ids': tuple(df_a['artist_id'].tolist())})
    engine.dispose()
    df = df_a.merge(artists, on=['artist_id'])
    return df.sort_values(['timeframe', 'points'], ascending=False)

def get_similar_tracks(df_t):
    engine = create_engine(DATABASE_URL)
    tracks = pd.read_sql_query(similar_tracks_query, engine, params={'track_ids': tuple(df_t['track_id'].tolist())})
    engine.dispose()
    df = df_t.merge(tracks, on=['track_id'])
    return df.sort_values(['timeframe', 'points'], ascending=False)

def get_artist_similarity(u1, u2, timeframe):
    merge_on = ['artist_id', 'timeframe', 'artist', 'genres', 'artist_url', 'artist_image']
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=merge_on, how='outer').fillna(0)
    df['base'] = calculate_points(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'points'] = calculate_points(df[['rank_x', 'rank_y']].max(axis=1))
    df['points'] = df['points'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_track_similarity(u1, u2, timeframe):
    audio_features = ['danceability', 'energy', 'loudness', 'acousticness', 'instrumentalness', 'liveness', \
                      'valence', 'tempo']
    merge_on = ['track_id', 'timeframe', 'track', 'artists', 'album', 'track_url', 'album_image'] + audio_features
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=merge_on, how='outer').fillna(0)
    df['base'] = calculate_points(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'points'] = calculate_points(df[['rank_x', 'rank_y']].max(axis=1))
    df['points'] = df['points'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df

def get_genre_similarity(u1, u2, timeframe):
    df1 = u1.loc[u1['timeframe'] == timeframe]
    df2 = u2.loc[u2['timeframe'] == timeframe]
    df = df1.merge(df2, on=['genre', 'timeframe'], how='outer').fillna(0)
    df['base'] = calculate_points(df[df[['rank_x', 'rank_y']] > 0].min(axis=1))
    df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0), 'points'] = calculate_points(df[df[['rank_x', 'rank_y']] > 0].max(axis=1))
    df['points'] = df['points'].fillna(0)
    # df = df.rename(columns={'rank_x': u1['user_id'].unique()[0], 'rank_y': u2['user_id'].unique()[0]})
    return df.sort_values(['timeframe', 'points'], ascending=False)

def calculate_similarity(df):
    base_score = df.loc[(df['rank_x'] != 0) & (df['rank_y'] != 0)].sum()['base']
    base_score += df.loc[(df['rank_x'] == 0) | (df['rank_y'] == 0)].sum()['base'] / 2
    return round(df.sum()['points'] / base_score, 4)

def calculate_feature_similarity(u1, u2, timeframe):
    features1 = u1.loc[u1['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    features2 = u2.loc[u2['timeframe'] == timeframe].drop(columns=['user_id', 'timeframe']).values.tolist()[0]
    points = []
    for i in range(len(features1)):
        f1 = abs(features1[i])
        f2 = abs(features2[i])
        points.append(min(f1, f2) / max(f1, f2))
    return round(sum(points) / len(points), 4)

def calculate_points(rank, weight=16, shift=4):
    return weight / ((0.1 * rank + shift) ** 2) 

In [3]:
u1 = '12120382831'
u2 = '12153521253'

In [51]:
import requests
import base64
from io import BytesIO
from PIL import Image

def generate_playlist(u1, u2):
    TOTAL_SONGS = 30
    # Get data from database
    engine = create_engine(DATABASE_URL)
    users = pd.read_sql(users2_query, engine, params={'user_ids': (u1, u2)})
    df_a = pd.read_sql(top_artists2_query, engine, params={'user_ids': (u1, u2)})
    df_t = pd.read_sql(top_tracks2_query, engine, params={'user_ids': (u1, u2)})
    # Similar tracks
    similar_tracks = df_t.loc[df_t.duplicated(subset=['track_id', 'timeframe'])]
    # Split by users
    u1_a = df_a.loc[(df_a['user_id'] == u1) & (df_a['rank'] <= 20)]
    u1_t = df_t.loc[(df_t['user_id'] == u1) & (df_t['rank'] <= 20)]
    u2_a = df_a.loc[(df_a['user_id'] == u2) & (df_a['rank'] <= 20)]
    u2_t = df_t.loc[(df_t['user_id'] == u2) & (df_t['rank'] <= 20)]
    # Get base playlist
    playlist_dicts = get_tracks_weight(u1_t, u2_a)
    playlist_dicts.extend(get_tracks_weight(u2_t, u1_a))
    df_p = pd.DataFrame.from_dict(playlist_dicts).sort_values(by='weight', ascending=False)
    df_p = df_p.loc[~df_p['track_id'].isin(similar_tracks['track_id'])].drop_duplicates(subset=['track_id'])
    # Final playlist
    df_f = similar_tracks.drop_duplicates().rename(columns={'points': 'weight'})
    df_f = df_f[['track_id', 'track', 'artists', 'album', 'track_url', 'album_image']]
    n_sample = TOTAL_SONGS - len(df_f)
    df_f = df_f.append(df_p.sample(weights=df_p['weight'], n=n_sample).sort_values(by='weight', ascending=False))
    # Playlist name
    code1 = users.loc[users['user_id'] == u1]['code'].item()
    code2 = users.loc[users['user_id'] == u2]['code'].item()
    p_name = code1.split('-')[0] + ' ' + code2.split('-')[0] + ' ' + code1.split('-')[1]
    if p_name[0] in ['a', 'e', 'i', 'o' 'u']:
        p_name = 'An ' + p_name
    else:
        p_name = 'A ' + p_name
    p_name = p_name.title() + '\'s Playlist'
    # Playlist description
    name1 = users.loc[users['user_id'] == u1]['display_name'].item()
    name2 = users.loc[users['user_id'] == u2]['display_name'].item()
    p_desc = "A playlist created for " + name1 + " and " + name2 + " by Soundbud."
    # Playlist image
    p_img = create_playlist_image(df_f)
    p_img_str = convert_to_jpeg(p_img)
    return df_f['track_id'].tolist(), p_name, p_desc, p_img_str

def create_playlist_image(df):
    p_img = Image.new('RGB', (400, 400))
    i = 0
    count = 0
    img_used = []
    while True and count <= 10:
        this_url = df['album_image'].iloc[count]
        if this_url not in img_used:
            img_used.append(this_url)
            img = Image.open(requests.get(this_url, stream=True).raw)
            img.thumbnail((400, 400))
            if i == 0:
                p_img.paste(img.crop((0, 0, 200, 200)), (0, 0))
            elif i == 1:
                p_img.paste(img.crop((200, 0, 400, 200)), (200, 0))
            elif i == 2:
                p_img.paste(img.crop((0, 200, 200, 400)), (0, 200))
            elif i == 3:
                p_img.paste(img.crop((200, 200, 400, 400)), (200, 200))
                break
            i += 1
        count += 1
    logo = Image.open('rect1822.png')
    logo.thumbnail((400, 400))
    p_img.paste(logo, (0, 0), logo)
    return p_img

def convert_to_jpeg(img):
    buffered = BytesIO()
    img.save(buffered, format="JPEG")
    img_str = base64.b64encode(buffered.getvalue())
    return img_str

def get_tracks_weight(user_t, other_a):
    dict_list = []
    for _, t in user_t.iterrows():
        weight = 21 - t['rank']
        artist = t['artists'].split(';')[0]
        if other_a['artist'].str.contains(artist).any():
            weight += 26 - other_a.loc[other_a['artist'].str.contains(artist)]['rank'].mean()
        dict_list.append({
            'track_id': t['track_id'],
            'track': t['track'],
            'artists': t['artists'],
            'album': t['album'],
            'track_url': t['track_url'],
            'album_image': t['album_image'],
            'weight': weight
        })
    return dict_list

In [54]:
tracks, p_name, p_desc, p_img = generate_playlist(u1, u2)
len(tracks)

30

In [46]:
p_name

"A Good Real Plane's Playlist"

In [47]:
p_desc

'A playlist created for Bin Xuan Kong and Thivya Dharishinie by Soundbud.'

In [49]:
import spotipy
from secrets import spotify_secrets
from spotipy.oauth2 import SpotifyOAuth

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=spotify_secrets["Client Id"],
                                               client_secret=spotify_secrets["Client Secret"],
                                               redirect_uri="http://localhost:8892/callback",
                                               scope="playlist-modify-public ugc-image-upload"))

In [50]:
p_snap = sp.user_playlist_create(u1, p_name, public=True, collaborative=False, description=p_desc)
p_snap

{'collaborative': False,
 'description': 'A playlist created for Bin Xuan Kong and Thivya Dharishinie by Soundbud.',
 'external_urls': {'spotify': 'https://open.spotify.com/playlist/60jsiHcnpZR1oA3LDilSVZ'},
 'followers': {'href': None, 'total': 0},
 'href': 'https://api.spotify.com/v1/playlists/60jsiHcnpZR1oA3LDilSVZ',
 'id': '60jsiHcnpZR1oA3LDilSVZ',
 'images': [],
 'name': "A Good Real Plane's Playlist",
 'owner': {'display_name': 'Bin Xuan Kong',
  'external_urls': {'spotify': 'https://open.spotify.com/user/12120382831'},
  'href': 'https://api.spotify.com/v1/users/12120382831',
  'id': '12120382831',
  'type': 'user',
  'uri': 'spotify:user:12120382831'},
 'primary_color': None,
 'public': True,
 'snapshot_id': 'MSw5ODMxOGQ3YzBkZDg4MjY3ODkyNzdhYzZiZTJjNjg3ZDM0Y2EwYzkx',
 'tracks': {'href': 'https://api.spotify.com/v1/playlists/60jsiHcnpZR1oA3LDilSVZ/tracks',
  'items': [],
  'limit': 100,
  'next': None,
  'offset': 0,
  'previous': None,
  'total': 0},
 'type': 'playlist',
 'uri'

In [56]:
sp.user_playlist_add_tracks(u1, p_snap['id'], tracks)

{'snapshot_id': 'MyxkZjIyMGViZmFlM2QxNGJmZjQ4ZmE3ZGE4N2JlNTJjNzVkYTk4MDYx'}

In [57]:
sp.playlist_upload_cover_image(p_snap['id'], p_img)

In [58]:
import datetime as dt

df_p = pd.DataFrame([
    [u1, u2, p_snap['id'], p_name, dt.datetime.now()]
], columns=['user_id', 'target_id', 'playlist_id', 'playlist_name', 'date_created'])

df_p

Unnamed: 0,user_id,target_id,playlist_id,playlist_name,date_created
0,12120382831,12153521253,60jsiHcnpZR1oA3LDilSVZ,A Good Real Plane's Playlist,2021-02-01 18:23:50.298969


In [59]:
engine = create_engine(DATABASE_URL)
df_p.to_sql('GeneratedPlaylists', engine, index=False, if_exists='replace')
engine.dispose()