In [2]:
import numpy as np
import pandas as pd
from pandas.io import gbq

import pickle

from pitched_recommend import Recommender

In [4]:
playlists_sql = "SELECT * FROM `umg-comm-tech-dev.browse_recommender.target_browse_playlists`"
playlists_list = gbq.read_gbq(playlists_sql, dialect='standard', project_id = 'umg-comm-tech-dev')

playlists_data_sql = "SELECT * FROM `umg-comm-tech-dev.browse_recommender.playlist_data`"
playlists_data = gbq.read_gbq(playlists_data_sql, dialect='standard', project_id = 'umg-comm-tech-dev')
playlists_data['long_uri'] = ['spotify:user:'+str(x) for x in playlists_data.playlist_uri]

data = playlists_list.merge(playlists_data, how='left', left_on='playlist_uri', right_on='long_uri')

In [5]:
data.head()

Unnamed: 0,territory,category_id,Position,playlist_uri_x,playlist_name_x,playlist_name_y,listeners,country_owner,playlist_owner,ranking,pl_ranking,playlist_uri_y,long_uri
0,SE,hiphop,1,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,100,100,20932.0,SE,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWXfgo3OOonqa,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...
1,SE,toplists,4,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,100,100,20932.0,SE,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWXfgo3OOonqa,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...
2,BR,popculture,2,spotify:user:spotify:playlist:37i9dQZF1DWWoqQt...,188,188,3418.0,BR,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWWoqQt9ZUTJC,spotify:user:spotify:playlist:37i9dQZF1DWWoqQt...
3,CO,chill,63,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,9PM,9PM,11138.0,MX,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DX6KOz8PNkOlS,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...
4,ES,chill,72,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,9PM,9PM,11138.0,MX,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DX6KOz8PNkOlS,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...


In [6]:
### Get the 'same_as_country' column

data['same_as_country'] = [1 if x==y else 0 for (x,y) in zip(data.territory,data.country_owner)]

In [10]:
### Get the 'count_appearance' column

playlist_count = data.playlist_uri_x.value_counts()
playlist_count = pd.DataFrame(playlist_count).reset_index().rename(columns={'index':'playlist_uri',
                                                                            'playlist_uri_x':'count_appearance'})
data = data.merge(playlist_count, how='left', left_on = 'playlist_uri_x', right_on = 'playlist_uri')
#data.head()

Unnamed: 0,territory,category_id,Position,playlist_uri_x,playlist_name_x,playlist_name_y,listeners,country_owner,playlist_owner,ranking,pl_ranking,playlist_uri_y,long_uri,same_as_country,playlist_uri,count_appearance
0,SE,hiphop,1,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,100,100,20932.0,SE,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWXfgo3OOonqa,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,1,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,2
1,SE,toplists,4,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,100,100,20932.0,SE,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWXfgo3OOonqa,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,1,spotify:user:spotify:playlist:37i9dQZF1DWXfgo3...,2
2,BR,popculture,2,spotify:user:spotify:playlist:37i9dQZF1DWWoqQt...,188,188,3418.0,BR,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DWWoqQt9ZUTJC,spotify:user:spotify:playlist:37i9dQZF1DWWoqQt...,1,spotify:user:spotify:playlist:37i9dQZF1DWWoqQt...,1
3,CO,chill,63,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,9PM,9PM,11138.0,MX,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DX6KOz8PNkOlS,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,0,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,13
4,ES,chill,72,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,9PM,9PM,11138.0,MX,spotify,1.0,1.0,spotify:playlist:37i9dQZF1DX6KOz8PNkOlS,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,0,spotify:user:spotify:playlist:37i9dQZF1DX6KOz8...,13


In [11]:
### Get the 'local' column

data['local'] = [1 if x<20 else 0 for x in data.count_appearance]

In [13]:
### Get the 'mean_listeners_cat_terr' column

by_cat_terr = data.groupby(by=['territory','category_id']).listeners.mean()
df_cat_terr = pd.DataFrame(by_cat_terr).reset_index()
df_cat_terr = df_cat_terr.rename(columns = {'listeners':'mean_listeners_cat_terr'})

data = data.merge(df_cat_terr, how='left', left_on = ['territory','category_id'], 
                  right_on = ['territory','category_id'])

In [15]:
### Get the 'popular' column

data['popular'] = [1 if x>y else 0 for (x,y) in zip(data.listeners,data.mean_listeners_cat_terr)]

In [17]:
### Get the 'score' column with the overall score
data['score'] = data.same_as_country + data.local + data.popular

In [18]:
cols = ['territory', 'category_id', 'Position', 'playlist_uri',
       'playlist_name_x', 'listeners', 'country_owner', 'count_appearance',
        'mean_listeners_cat_terr', 'same_as_country', 'local', 'popular',
        'score']

In [19]:
data = data[cols].sort_values(by=['territory','category_id','score'], ascending=False)
data_final = pd.DataFrame(data.groupby(by=['territory','category_id']).head(10).reset_index())

In [24]:
data_final.head()

Unnamed: 0,index,territory,category_id,Position,playlist_uri,playlist_name_x,listeners,country_owner,count_appearance,mean_listeners_cat_terr,same_as_country,local,popular,score
0,100,UY,workout,8,spotify:user:spotify:playlist:37i9dQZF1DX4eRPd...,Hype,452955.0,US,58,87462.893617,0,0,1,1
1,877,UY,workout,7,spotify:user:spotify:playlist:37i9dQZF1DWSJHnP...,Cardio,89598.0,US,58,87462.893617,0,0,1,1
2,2398,UY,workout,5,spotify:user:spotify:playlist:37i9dQZF1DX70RN3...,Workout,444881.0,US,58,87462.893617,0,0,1,1
3,8276,UY,workout,2,spotify:user:spotify:playlist:37i9dQZF1DX76Wlf...,Beast Mode,1343212.0,US,58,87462.893617,0,0,1,1
4,29452,UY,workout,1,spotify:user:spotify:playlist:37i9dQZF1DX9ZKyQ...,Para Entrenar,65602.0,MX,17,87462.893617,0,1,0,1


In [25]:
data_final.to_csv('processed_recs_for_andy_tab.csv', sep='\t', index=False)

In [21]:
def install_and_import(package):
    import importlib
    try:
        importlib.import_module(package)
    except ImportError:
        import pip
        pip.main(['install', package])
    finally:
        globals()[package] = importlib.import_module(package)



In [23]:
install_and_import('scipy.sparse')

In [35]:
test = pd.read_csv('/Users/daria/Downloads/recommender%2Fbrowse_recommendations_20181205 (1).csv', sep='\t', 
                   index_col=False)

In [36]:
test.shape

(299693, 7)

In [2]:
test2 = pd.read_csv('/Users/daria/development/umg/recommender/playlist_pitching/preprocessed_recs_for_andy_tab.csv',
                    sep='\t', 
                   index_col=False)

In [3]:
test2.shape

(102950, 7)

In [4]:
test2.head()

Unnamed: 0,territory,category,scores,isrc,seed_playlist_uri,track_artist,track_title
0,UY,workout,1.437311,USUG11800877,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Jay Rock,WIN
1,UY,workout,1.408097,USUM71800022,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Eminem,Chloraseptic (feat. 2 Chainz & Phresher) - Remix
2,UY,workout,1.347921,GBUM71028043,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...,Ellie Goulding,Lights - Single Version
3,UY,workout,1.346981,USAT21704833,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Meek Mill,"That's My N**** (with Meek Mill, YG & Snoop Dogg)"
4,UY,workout,1.294945,USAT21101071,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...,Gym Class Heroes,Stereo Hearts (feat. Adam Levine) - feat. Adam...


In [5]:
test2.category.unique()

array(['workout', 'travel', 'toplists', 'soul', 'sleep', 'roots',
       'romance', 'rock', 'rnb', 'reggae', 'punk', 'popculture', 'pop',
       'mood', 'metal', 'kpop', 'jazz', 'inspirational', 'indie_alt',
       'holidays', 'hiphop', 'gaming', 'funk', 'focus', 'family',
       'edm_dance', 'dinner', 'desi', 'decades', 'country', 'comedy',
       'classical', 'chill', 'blues', 'afro', 'word', 'sessions', 'party',
       'latin', 'francophone'], dtype=object)

In [39]:
test.category.unique()

array(['workout', 'travel', 'toplists', 'soul', 'sleep', 'roots',
       'romance', 'rock', 'rnb', 'reggae', 'punk', 'pop', 'mood', 'metal',
       'kpop', 'kids', 'jazz', 'inspirational', 'indie_alt', 'holidays',
       'hiphop', 'gaming', 'funk', 'focus', 'edm_dance', 'decades',
       'country', 'comedy', 'classical', 'chill', 'brazilian', 'blues',
       'arab', 'afro', 'party', 'latin', '2018'], dtype=object)

In [29]:
test.head()

Unnamed: 0,territory,category,scores,isrc,seed_playlist_uri,track_artist,track_title
0,AD,workout,1.437311,USUG11800877,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Jay Rock,WIN
1,AD,workout,1.408098,USUM71800022,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Eminem,Chloraseptic (feat. 2 Chainz & Phresher) - Remix
2,AD,workout,1.38163,USUM71710087,spotify:user:spotify:playlist:37i9dQZF1DWU4orV...,Post Malone,rockstar
3,AD,workout,1.339208,QMUY41600121,spotify:user:spotify:playlist:37i9dQZF1DX4FJyI...,Major Lazer,Run Up (feat. PARTYNEXTDOOR & Nicki Minaj)
4,AD,workout,1.320847,USLR50000417,spotify:user:spotify:playlist:37i9dQZF1DX76t63...,M.O.P.,Ante Up (Robbin Hoodz Theory)


In [3]:
with open('metadata_dict.pkl', 'rb') as f:
    metadata = pickle.load(f)

In [4]:
with open('/Users/daria/Downloads/saved_model.pkl', 'rb') as f:
    model = pickle.load(f)

In [5]:
playlists = pd.read_csv('/Users/daria/Downloads/data_browse_playlists.csv', index_col='index')
playlists.head()

Unnamed: 0_level_0,territory,category_id,Position,playlist_uri,playlist_name_x,listeners,country_owner,count_appearance,mean_listeners_cat_terr,same_as_country,local,popular,score
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
55589,UY,workout,10,spotify:user:spotify:playlist:37i9dQZF1DX4eRPd...,Hype,666033.0,US,58,108837.659574,0,0,1,1
55741,UY,workout,7,spotify:user:spotify:playlist:37i9dQZF1DX70RN3...,Workout,307011.0,US,58,108837.659574,0,0,1,1
55917,UY,workout,2,spotify:user:spotify:playlist:37i9dQZF1DX76Wlf...,Beast Mode,1460332.0,US,58,108837.659574,0,0,1,1
56575,UY,workout,1,spotify:user:spotify:playlist:37i9dQZF1DX9ZKyQ...,Para Entrenar,64519.0,MX,17,108837.659574,0,1,0,1
56597,UY,workout,11,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...,Power Workout,616480.0,US,57,108837.659574,0,0,1,1


In [6]:
all_territories = list(playlists.territory.unique())

rec = []

df=playlists.copy()

for territory in all_territories:
    #print("Producing recommendations for ", territory)
    terr_cat = list(df[df.territory==territory].category_id.unique())
    for category in terr_cat:
        #print("Producing recommendations for category ", category)
        terr_cat_playlists = list(df[(df.territory==territory)&(df.category_id==category)].playlist_uri.unique())
        rec.extend(model.rec_multiple_playlists(terr_cat_playlists, model.playlists_sparse, territory=territory,
                                          category = category, N=150))

No recommendations for territory-category  US popculture


KeyboardInterrupt: 

In [16]:
rec

[(17989,
  1.4373112,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P',
  'UY',
  'workout'),
 (72958,
  1.4080974,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P',
  'UY',
  'workout'),
 (14247,
  1.3479207,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j',
  'UY',
  'workout'),
 (4351,
  1.3469807,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P',
  'UY',
  'workout'),
 (277,
  1.2949445,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j',
  'UY',
  'workout'),
 (7716,
  1.2908813,
  'spotify:user:spotify:playlist:37i9dQZF1DX9ZKyQHcEFXZ',
  'UY',
  'workout'),
 (50763,
  1.2787248,
  'spotify:user:spotify:playlist:37i9dQZF1DX0HRj9P7NxeE',
  'UY',
  'workout'),
 (76578,
  1.2733085,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P',
  'UY',
  'workout'),
 (61311,
  1.2670068,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j',
  'UY',
  'workout'),
 (29540,
  1.2645034,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j',
  'UY'

In [19]:
model.isrcs[17989]

'USUG11800877'

In [14]:
recommendations

[('USUG11800877',
  1.4373111724853516,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P'),
 ('USUM71800022',
  1.4080973863601685,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P'),
 ('GBUM71028043',
  1.3479206562042236,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j'),
 ('USAT21704833',
  1.3469806909561157,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P'),
 ('USAT21101071',
  1.2949445247650146,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j'),
 ('CYA111500132',
  1.2908812761306763,
  'spotify:user:spotify:playlist:37i9dQZF1DX9ZKyQHcEFXZ'),
 ('USAT21801428',
  1.2787247896194458,
  'spotify:user:spotify:playlist:37i9dQZF1DX0HRj9P7NxeE'),
 ('USSM11704822',
  1.273308515548706,
  'spotify:user:spotify:playlist:37i9dQZF1DWUVpAXiEPK8P'),
 ('USRC11301020',
  1.267006754875183,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j'),
 ('GBCTA1400065',
  1.2645033597946167,
  'spotify:user:spotify:playlist:37i9dQZF1DXadOVCgGhS7j'),
 ('GBUM71705

In [18]:
recs_final = [(model.isrc_lookup[x[0]],x[1],metadata['track_artist'][x[0]], metadata['track_title'][x[0]], metadata['major_label'][x[0]],
               metadata['label_studio'][x[0]],
               metadata['original_release_date'][x[0]], metadata['genre_name'][x[0]], metadata['parent_genre_name'][x[0]], 
               x[2], x[3], x[4]) for x in list(rec)]

KeyError: 17989

In [11]:
all_columns = ['isrc','score','artist','title','major_label','label_studio','original_release_date','genre_name',
               'parent_genre_name','seed_playlist_uri','territory','category']
recs_df_final = pd.DataFrame(recs_final, columns = all_columns)

Unnamed: 0,isrc,score,artist,title,label,label_studio,original_release_date,genre_name,parent_genre_name,seed_playlist_uri
0,USUG11800877,1.437311,Jay Rock,WIN,UMG,Jay Rock (TDE/ISR) JV,2018-06-15 00:00:00,Hip Hop/Rap,Hip Hop/Rap,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...
1,USUM71800022,1.408097,Eminem,Chloraseptic (feat. 2 Chainz & Phresher) - Remix,UMG,Aftermath New JV-Old School,2018-01-08 00:00:00,Hip Hop/Rap,Hip Hop/Rap,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...
2,GBUM71028043,1.347921,Ellie Goulding,Lights - Single Version,UMG,Polydor,2012-10-05 00:00:00,Pop,Pop,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...
3,USAT21704833,1.346981,Meek Mill,"That's My N**** (with Meek Mill, YG & Snoop Dogg)",Unknown,Unknown,Unknown,Unknown,Unknown,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...
4,USAT21101071,1.294945,Gym Class Heroes,Stereo Hearts (feat. Adam Levine) - feat. Adam...,Warner,WM Japan,2017-09-27 00:00:00,Pop,Pop,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...
5,CYA111500132,1.290881,Tiësto,The Right Song,UMG,Marketing Labs,2016-07-22 00:00:00,Dance,Dance,spotify:user:spotify:playlist:37i9dQZF1DX9ZKyQ...
6,USAT21801428,1.278725,Cardi B,Bickenhead,Warner,Atlantic/KSR,2018-04-06 00:00:00,Hip Hop/Rap,Hip Hop/Rap,spotify:user:spotify:playlist:37i9dQZF1DX0HRj9...
7,USSM11704822,1.273309,Big Boi,All Night,Other,Purple Ribbon/HITCO,2017-06-16 00:00:00,Hip Hop/Rap,Hip Hop/Rap,spotify:user:spotify:playlist:37i9dQZF1DWUVpAX...
8,USRC11301020,1.267007,Justin Timberlake,Drink You Away,Sony,RCA Records Label,2013-09-27 00:00:00,Pop,Pop,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...
9,GBCTA1400065,1.264503,Kelly Clarkson,Heartbeat Song,Sony,Sony Music Special Projects,2016-05-04 00:00:00,Pop,Pop,spotify:user:spotify:playlist:37i9dQZF1DXadOVC...


In [None]:
rec_df_final[rec_df_final.major_label]