In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''') 

In [1]:
import urllib.request
import urllib.parse
import re
import spotipy
import spotipy.util as util
import spotify_tokens
import pandas as pd
import numpy as np
from collections import Counter
import argparse
import os.path
import time
# from ConfigParser import SafeConfigParser
from datetime import datetime

import httplib2

import plotly.express as px

# Google Data API
# from apiclient.discovery import build
# import oauth2client
# from oauth2client.file import Storage
# from oauth2client.client import flow_from_clientsecrets
# from oauth2client.tools import run_flow

pd.options.display.max_rows = 2000
user = 'matteo7moh'

In [2]:
import matplotlib.pyplot as plt

# Analytics

## All genres, sources, bpm, tags

In [3]:
# To find typos and wrong values (stupid humans...)
tracks = pd.read_csv('sorted_tracks.csv', dtype={'tags': str, 'genre': str})
tracks = tracks.sort_values(['genre','artists','title'])
for c in tracks.columns[2:-2]:
    print()
    print(c, tracks[c].unique())


genre ['alternative' 'alternative electronic' 'alternative_rock'
 'alternative_rock pop' 'ambient' 'ambient contemporary_classical'
 'ambient experimental' 'chill_hop' 'classical' 'classical electronic'
 'contemporary_classical' 'contemporary_classical ambient'
 'contemporary_classical electronic' 'deep_house' 'dubstep'
 'electro_house' 'electronic' 'electronic alternative'
 'electronic downtempo' 'experimental' 'experimental ambient'
 'experimental electronic' 'experimental harp' 'folk' 'future_bass'
 'hip_hop' 'hip_hop experimental' 'house' 'indie' 'indie pop' 'jazz'
 'jazz pop' 'minimal_house' 'pop' 'punk' 'rock' 'rock alternative'
 'rock pop' 'soul' 'techno' 'techno downtempo' 'techno trance' 'trance'
 nan]

src ['sp' 'yt' 'yt sp' 'sc' 'bc' nan 'sp yt']

bpm [ nan 126. 121. 122. 120. 110. 124. 116. 118. 127. 140. 136. 114. 123.
 117. 129. 112. 137.  99. 113. 180. 141. 128. 130. 132. 125. 133. 119.
 142. 135. 131. 134. 150. 144. 138. 146. 160. 143. 155. 148. 145. 139.
 151. 147.]



In [4]:
# who is in ambient and cont classical?
soft_styles = ['ambient', 'contemporary_classical']
electronic_styles = ['electronic', 'deep_house', 'dubstep', 'electro_house', 'minimal_house', 'techni', 'trance', 'alternative', 'downtempo', 'house']
soft_artists = set(tracks[tracks.genre.isin(soft_styles)].artists.unique())
electronic_artists = set(tracks[tracks.genre.isin(soft_styles)].artists.unique())

In [29]:
soft_artists.intersection(electronic_artists)

{'a_winged_victory_for_the_sullen',
 'aaron_martin',
 'abul_mogard',
 'abul_mogard aisha_devi',
 'abul_mogard aukai',
 'adam_bryanbaum_wiltzie',
 'agnes_obel',
 'aidan_baker ekin_fil',
 'alessandro_penna',
 'alexandre_desplat',
 'alva_noto',
 'ana_roxanne',
 'anasisana',
 'andrea_ronen',
 'andy_stott',
 'anne_müller',
 'annelie',
 'anthony_linell',
 'aphex_twin',
 'apparat',
 'arden_forest',
 'arseny',
 'arthur_jeffes nils_frahm',
 'arthur_lyman',
 'arvo_pärt',
 'arvo_pärt john_cage giacinto_scelsi',
 'autechre',
 'bachar_mar-khalifé',
 'ben_frost',
 'ben_lukas_boysen',
 'bendik_giske',
 'bing_&_ruth',
 'biosphere',
 'bon_iver',
 'brian_eno',
 'bruno_coulais',
 'c._diab',
 'carter_burwell',
 'caterina_barbieri',
 'caterina_barbieri kali_malone',
 'celer',
 'celer forest_management',
 'chihei_hatakeyama',
 'chihei_hatakeyama field_works',
 'chilly_gonzales',
 'christina_vantzou',
 'christina_vantzou echo_collective',
 'clark',
 'clem_leek',
 'clint_mansell',
 'colin_stetson',
 'colin_st

In [4]:
upper = 50
lower = 10
most_popular_artists = tracks.groupby('artists').count().reset_index()[['artists', 'title']].sort_values('title', ascending=False)
selected_artists = most_popular_artists.query('title <= ' + str(upper) + ' & ' + str(lower) + ' <= title')


In [8]:
most_popular_artists.head(1000)

Unnamed: 0,artists,title
240,chopin,151
774,loscil,112
1187,son_lux,96
1138,schubert,91
402,eluvium,84
954,nils_frahm,76
274,colin_stetson,68
45,alessandro_cortini,64
1004,paul_kalkbrenner,61
783,ludovico_einaudi,53


In [12]:
s = 'www_dd'

In [13]:
s.replace('_', ' ')

'www dd'

In [15]:
for e in [el.replace('_', ' ') for el in most_popular_artists['artists'].values]:
    print(e)

chopin
loscil
son lux
schubert
eluvium
nils frahm
colin stetson
alessandro cortini
paul kalkbrenner
ludovico einaudi
ash koosha
giulio aldinucci
hotel neon
beethoven
yann novak
tchaikovsky
yann tiersen
sarah davachi
gas
jackie gleason
abul mogard
emptyset
fritz kalkbrenner
arca
debussy
brahms
ben frost
dustin o'halloran
coldplay
ryan lott
andrea ronen
canzoniere grecanico salentino
rachmaninoff
ólafur arnalds
hvob
mozart
paganini
ian hawgood
douglas dare
darse
valentin silvestrov sergey yakovenko
black taffy
jon hopkins
runar blesvik
recondite
liszt
rival consoles
agnes obel
joep beving
philip glass
ian william craig
strauss ii
emile mosseri
kangding ray
florence and the machine
beriot
bing & ruth
nthng
the xx
flume
roly porter
the sight below
dvořák
eartheater
fabri fibra
caterina barbieri
john adams
mezerg
émile waldteufel
max cooper
helmut qualtinger
wun two
doon kanda
arvo pärt
apparat
alexandre desplat
marc rebillet
marino canal
arseny
four tet
a winged victory for the sullen
bach

In [15]:
# fig = px.bar(selected_artists, x='artists', y='title')
# fig.show()

In [5]:
tracks.loc[tracks.genre == 'sp']

Unnamed: 0,artists,title,genre,src,bpm,tags,rating,spotify_id,youtube_id


In [6]:
tracks[tracks['title'].isnull()]

Unnamed: 0,artists,title,genre,src,bpm,tags,rating,spotify_id,youtube_id
1883,a_winged_victory_for_the_sullen,,contemporary_classical,sp,,,2.0,,


In [5]:
# # to print urls and then check if they are still valid
# for i in tracks.loc[tracks.genre == 'techno'].loc[~pd.isna(tracks.spotify_id)].spotify_id.values:
#     print('https://open.spotify.com/track/' + i)

In [None]:
# tracks[tracks.youtube_id == '\t']
# tracks[tracks.src == 'house']
# tracks[tracks.bpm == 'violin orchestra']
# tracks[tracks.tags == '2']
# tracks[tracks.rating == '26yjkiLAvOiLbeWWM0XEcCs']
# tracks[tracks.spotify_id == 'acid ']

In [8]:
# tracks.artists = tracks.artists.str.lower()
# tracks.to_csv('sorted_tracks.csv', index=False)

## Most common words in titles

In [9]:
words_occur_dict = dict()
for title in tracks['title'].tolist():
    words_title = title.split()
    for word in words_title:
        if word not in words_occur_dict:
            words_occur_dict[word] = 0
        words_occur_dict[word] += 1

In [10]:
words_occur = []
for word in words_occur_dict:
    words_occur.append([word, words_occur_dict[word]])

In [11]:
words_occur.sort(key=lambda x: -x[1])

In [12]:
for el in words_occur[:200]:
    print(el)

['the', 412]
['in', 349]
['a', 161]
['of', 160]
['i', 138]
['no.', 132]
['minor', 131]
['1', 125]
['2', 120]
['no', 118]
['concerto', 118]
['piano', 102]
['op.', 101]
['ii', 100]
['3', 98]
['d', 78]
['major', 76]
['and', 73]
['for', 72]
['op', 71]
['iii', 69]
['to', 68]
['violin', 66]
['symphony', 65]
['you', 64]
['sonata', 62]
['mazurka', 60]
['la', 56]
['me', 50]
['love', 47]
['flat', 46]
['e', 43]
['on', 41]
['c', 41]
['dance', 41]
['act', 41]
['b', 40]
['is', 39]
['f', 39]
['it', 38]
['4', 38]
['scene', 35]
['no.1', 34]
['no.2', 34]
['24', 33]
['6', 32]
['allegro', 31]
['my', 30]
['all', 30]
['5', 30]
['this', 27]
['one', 27]
['iv', 27]
['waltz', 27]
['nutcracker', 27]
['g', 26]
['prelude', 25]
['hungarian', 25]
['no.3', 25]
['préludes', 24]
['op.28', 24]
['light', 23]
['we', 23]
['nocturne', 23]
['time', 22]
['from', 21]
['night', 21]
['no.4', 21]
['de', 20]
['9', 20]
['with', 19]
['at', 19]
['7', 19]
['sharp', 19]
['your', 18]
['pt.', 18]
['del', 18]
['d.', 18]
['up', 17]
['life'

In [13]:
tracks.head()

Unnamed: 0,artists,title,genre,src,bpm,tags,rating,spotify_id,youtube_id
1,amsterdam_klezmer_band,chinese duck,alternative,sp,,söndörgő,2.0,,
2,amsterdam_klezmer_band,mooncatch,alternative,sp,,söndörgő,2.0,,
3,amsterdam_klezmer_band,moroka,alternative,sp,,söndörgő,2.0,,
4,amsterdam_klezmer_band,powerbeat,alternative,sp,,söndörgő,2.0,,
5,amsterdam_klezmer_band,random flow,alternative,sp,,söndörgő,2.0,,


## All Artists

In [16]:
artists_raw = tracks['artists'].unique().tolist()
artists_set = set()
for a in artists_raw:
    for sa in a.split(' '):
        artists_set.add(sa)
artists = list(artists_set)
artists.sort()
print('Number of unique artists', len(artists))
for a in artists:
    print(a, end=' ')

Number of unique artists 1430
(((_o_))) .ihaveaface 2+2=5 2000_and_one 50_cent 747 7options 98u 999999999 a[way] a_winged_victory_for_the_sullen aaron_copland aaron_martin abdullah_miniawy abstract_division abul_mogard acid_pauli acronym adam_beyer adam_bryanbaum_wiltzie adele adiel adriatique adventure_club aether afrojack age_of_love agents_of_time aglio agnes_obel aho_ssan aidan_baker aimless airhead airod aisha_devi aka_aka akon al_ferox alan_fitzpatrick alberth alberto_fracasso albinoni alessandro_cortini alessandro_penna alesso alex.do alex_niggemann alex_smoke alexandre_desplat alexis_grapsas alfons_czibulka alien_rain altinbas altman alunageorge alva_noto always_centered_at_night amandra amber ambition² ambivalent ambulo amelie_lens amsterdam_klezmer_band amy_pearson an21 ana_roxanne anasisana and andhim andrea_roma andrea_ronen andrew_wyatt anduschus andy_stott anetha angger_dimas angle angy_kore anna anna_caragnano anna_naklab anne_müller annelie ansome anthony_linell anthro 

## Top N artists sorted by number of tracks 

In [19]:
tracks_list = tracks.values.tolist()
artist_nTracks = dict()
for a in artists:
    for t in tracks_list:
        if a in t[0].split(' '):
            if a not in artist_nTracks:
                artist_nTracks[a] = 0
            artist_nTracks[a] += 1
N = 200
artist_nTracks_list = []
for a in artist_nTracks.keys():
    artist_nTracks_list.append([a, artist_nTracks[a]])
artist_nTracks_list.sort(key=lambda x: -x[1])    
artist_nTracks_list[:N]

[['chopin', 151],
 ['loscil', 108],
 ['son_lux', 104],
 ['schubert', 93],
 ['eluvium', 85],
 ['colin_stetson', 70],
 ['nils_frahm', 70],
 ['alessandro_cortini', 69],
 ['paul_kalkbrenner', 63],
 ['giulio_aldinucci', 62],
 ['ludovico_einaudi', 56],
 ['ash_koosha', 53],
 ['hotel_neon', 48],
 ['yann_novak', 47],
 ['beethoven', 45],
 ['yann_tiersen', 44],
 ['flume', 43],
 ['gas', 42],
 ['tchaikovsky', 42],
 ['debussy', 40],
 ['jackie_gleason', 39],
 ['emptyset', 38],
 ['fritz_kalkbrenner', 38],
 ['abul_mogard', 36],
 ['arca', 36],
 ['coldplay', 36],
 ['ben_frost', 35],
 ['philip_glass', 34],
 ['tale_of_us', 34],
 ['ian_hawgood', 33],
 ['brahms', 32],
 ['ryan_lott', 32],
 ['canzoniere_grecanico_salentino', 31],
 ['darse', 31],
 ['hvob', 31],
 ['sarah_davachi', 31],
 ['andrea_ronen', 30],
 ['ólafur_arnalds', 30],
 ['liszt', 29],
 ['paganini', 29],
 ['rachmaninoff', 29],
 ['recondite', 29],
 ['the_sight_below', 29],
 ['black_taffy', 28],
 ['douglas_dare', 28],
 ['joep_beving', 28],
 ['mozart',

# Top N Artists sorted by number of extraordinary tracks

In [17]:
tracks_list = tracks.values.tolist()
artist_nTracks = dict()
for a in artists:
    for t in tracks_list:
        if t[-3] > 2:
            if a in t[0].split(' '):
                if a not in artist_nTracks:
                    artist_nTracks[a] = 0
                artist_nTracks[a] += 1
N = 50
artist_nTracks_list = []
for a in artist_nTracks.keys():
    artist_nTracks_list.append([a, artist_nTracks[a]])
artist_nTracks_list.sort(key=lambda x: -x[1])    
# artist_nTracks_list[:N]
artist_nTracks_list


[['schubert', 11],
 ['chopin', 9],
 ['tale_of_us', 9],
 ['beethoven', 8],
 ['ludovico_einaudi', 8],
 ['mozart', 8],
 ['tchaikovsky', 8],
 ['colin_stetson', 7],
 ['mathame', 7],
 ['alessandro_cortini', 6],
 ['brahms', 6],
 ['yann_tiersen', 6],
 ['bruch', 5],
 ['fango', 5],
 ['flume', 5],
 ['kangding_ray', 5],
 ['son_lux', 5],
 ['caterina_barbieri', 4],
 ['giulio_aldinucci', 4],
 ['liszt', 4],
 ['macklemore', 4],
 ['philip_glass', 4],
 ['sibelius', 4],
 ['trym', 4],
 ['andrea_ronen', 3],
 ['bach', 3],
 ['clair', 3],
 ['darse', 3],
 ['johannes_heil', 3],
 ['locked_groove', 3],
 ['rival_consoles', 3],
 ['vivaldi', 3],
 ['amelie_lens', 2],
 ['barnt', 2],
 ['clouds', 2],
 ['denis_horvat', 2],
 ["dustin_o'halloran", 2],
 ['eomac', 2],
 ['four_tet', 2],
 ['i_hate_models', 2],
 ['mendelssohn', 2],
 ['mind_against', 2],
 ['moderat', 2],
 ['ovend', 2],
 ['perc', 2],
 ['rachmaninoff', 2],
 ['sarasate', 2],
 ['strauss_ii', 2],
 ['the_xx', 2],
 ['tommy_trash', 2],
 ['age_of_love', 1],
 ['aho_ssan', 

# Genres sorted by number of tracks [TODO]

# Old stuff

In [None]:
def show_tracks(tracks):
    for i, item in enumerate(tracks['items']):
        track = item['track']
        print('_'.join(track['artists'][0]['name'].split(' ')) + ',' + ''.join([c for c in track['name'] if c != ',']))

In [128]:
token = util.prompt_for_user_token(user, "playlist-modify-public", 
                            client_id=spotify_tokens.SPOTIPY_CLIENT_ID,
                            client_secret=spotify_tokens.SPOTIPY_CLIENT_SECRET, 
                            redirect_uri=spotify_tokens.SPOTIPY_REDIRECT_URI)

In [129]:
sp = spotipy.Spotify(auth=token)

In [130]:
derek = 'derekmholden'

In [None]:
# Rachmaninoff
# https://open.spotify.com/playlist/7ieitehSj3tK1r7JANMGpB?si=a-1JX8FBSJyw4VPrXA8Rzg
# https://open.spotify.com/playlist/77Yvz89wtLG03NfXjZsDsn?si=BlUBtONUTvmEnAOlmKThXA

# select most interesting playlists from derekmholden
# save in a json file all the tracks of these playlists
# for each playlist
    # print name, playlist_id and # tracks
# for each selected playlist
    # get tracks

# key:artist value:dict keys:id-str,tracks-list

# playlist_tracks = dict()
# playlist_tracks[artist] = dict()
# playlist_tracks[artist][id] = id
# playlist_tracks[artist][tracks] = []
# playlist_tracks[artist][tracks][0] = [title, id]

In [131]:
selected_playlists = ['64WbDcZ0qydwB74R0WdKbK','6EHds7JuhUBmkO7RsyfkAE','08nBsQe7VuuglVlDrYZei1','23CR74npG2F0wli8jtQJz4','3IBjLBs0kTu3jVOR6nksRv','4l8oYlE92sxBZPJGuLMcta','1js4hNZXYnSSaiotGLgbpI','3xC4slztlOMAcEpOZ75nnl','4Cdf8iJoA2TlYeaCYZ6PTd','6KlJr1MDw5UaGtJrYvQsFw','0fsJWwx9klhtPlkZXzcPdx','26Ntk1HaC7DeTcuNPkpVhA','5lRhN0zDQBqpbHyatqOSw1','7FmXps14i06FBZ4ROkVSMS','5Ycgmq3fqjotgynY0ZVHki','4XCcEfzrzvnkYw4HyLIQ8t','3un8zVtnUdKN84ArpU2VlL','0JbQEXIvOchzx9skQJLD8O','49wApDOefMMttQAXwFsPKL','1LnpNJiaXGIWXSBj9MRePk','5eHKopkOdq5ayaFH7ojTRU','5xdLTgjqc8o2myRCoEdS35','2UcuwagFHtgrn73UXfw8Yu','4fyt7asuScLC3Ja78m9pcN','1D6DzsoK6CoWoGvSSmIDt1','7zOiKIjS8XH0W7U4zBp0dL','29ywUWrfS9fdQ1OsuBwdN5','5GS3atggscyIt7cWmKfWlu','56SLP5poIart2u0bkAYd1T','6z4LP41BgRPP32O4GAAKzs','1jvQTLsqbHtuDL3U3thaQB','0xgbbLEAQ72y92Bed3Oh34','5UHFt6tWXgAQr5c7NRw9Oz','4RyTyMFXlIesgsHE1U9WVw','2NelDWOzy23QlS2ZHcz2Lt','2vEvfIUeaJVKjOyIdr7bnq',]

In [144]:
empty = False
playlist_tracks = dict()
i = 0
while not empty:
    playlists = sp.user_playlists(derek, offset=i*50)
    print('# of playlists:', len(playlists['items']))
    if playlists['items']:
        for el in playlists['items']:
            if el['id'] in selected_playlists:
                print('playlist:', el['name'])
                print('#tracks:', el['tracks']['total'])
                playlist_tracks[el['name']] = dict()
                playlist_tracks[el['name']]['id'] = el['id']               
                playlist_tracks[el['name']]['tracks'] = []
                i2 = 0
                empty2 = False
                while not empty2:
                    print('empty2', empty2, i2)
                    tracks_pl =  sp.user_playlist_tracks(user=derek, playlist_id=el['id'], offset=i2*100)
                    i2 += 1
                    if tracks_pl['items']:
                        print('#tracks in chunk', len(tracks_pl['items']))
                        for tr in tracks_pl['items']:
                            playlist_tracks[el['name']]['tracks'].append([tr['track']['name'], tr['track']['id']])
                    else:
                        empty2 = True
    else:
        empty = True
    i += 1   

# of playlists: 50
playlist: Adams, John (Chronological)
#tracks: 297
empty2 False 0
#tracks in chunk 100
empty2 False 1
#tracks in chunk 100
empty2 False 2
#tracks in chunk 97
empty2 False 3
playlist: Bach, J. S. (Complete-ish)
#tracks: 3522
empty2 False 0
#tracks in chunk 100
empty2 False 1
#tracks in chunk 100
empty2 False 2
#tracks in chunk 100
empty2 False 3
#tracks in chunk 100
empty2 False 4
#tracks in chunk 100
empty2 False 5
#tracks in chunk 100
empty2 False 6
#tracks in chunk 100
empty2 False 7
#tracks in chunk 100
empty2 False 8
#tracks in chunk 100
empty2 False 9
#tracks in chunk 100
empty2 False 10
#tracks in chunk 100
empty2 False 11
#tracks in chunk 100
empty2 False 12
#tracks in chunk 100
empty2 False 13
#tracks in chunk 100
empty2 False 14
#tracks in chunk 100
empty2 False 15
#tracks in chunk 100
empty2 False 16
#tracks in chunk 100
empty2 False 17
#tracks in chunk 100
empty2 False 18
#tracks in chunk 100
empty2 False 19
#tracks in chunk 100
empty2 False 20
#tracks in 

#tracks in chunk 100
empty2 False 1
#tracks in chunk 52
empty2 False 2
playlist: Rimsky-Korsakov, Nicolai (Chronological)
#tracks: 380
empty2 False 0
#tracks in chunk 100
empty2 False 1
#tracks in chunk 100
empty2 False 2
#tracks in chunk 100
empty2 False 3
#tracks in chunk 80
empty2 False 4
playlist: Satie, Erik (Chronological)
#tracks: 260
empty2 False 0
#tracks in chunk 100
empty2 False 1
#tracks in chunk 100
empty2 False 2
#tracks in chunk 60
empty2 False 3
playlist: Schubert, Franz (Chronological)
#tracks: 1568
empty2 False 0
#tracks in chunk 100
empty2 False 1
#tracks in chunk 100
empty2 False 2
#tracks in chunk 100
empty2 False 3
#tracks in chunk 100
empty2 False 4
#tracks in chunk 100
empty2 False 5
#tracks in chunk 100
empty2 False 6
#tracks in chunk 100
empty2 False 7
#tracks in chunk 100
empty2 False 8
#tracks in chunk 100
empty2 False 9
#tracks in chunk 100
empty2 False 10
#tracks in chunk 100
empty2 False 11
#tracks in chunk 100
empty2 False 12
#tracks in chunk 100
empty2 

In [116]:
from pprint import pprint

In [147]:
for k in playlist_tracks:
    print(len(playlist_tracks[k]['tracks']), k)

297 Adams, John (Chronological)
3522 Bach, J. S. (Complete-ish)
645 Bartok, Bela (Chronological)
1155 Beethoven, Ludwig van (complete chronological)
420 Berlioz, Hector (Chronological?)
925 Brahms, Johannes (Complete)
230 Bruckner, Anton (Chronological, Excluding Falsely-Attributed Works and Kitzler exercises)
306 Busoni, Ferruccio (Chronological)
1002 Cage, John (Chronological)
255 Chopin, Frederic (By Opus, then Publication Date)
402 Debussy, Claude (Chronological)
198 Gesualdo, Carlo (Chronological)
918 Glass, Philip (chronological)
101 Gorecki, Henryk (Chronological)
199 Ligeti, Gyorgy (chronological)
182 Mahler, Gustav (Chronological)
236 Mendelssohn, Felix (Chronlogical)
461 Monteverdi, Claudio (Chronological)
2678 Mozart, W. A. (Chronological)
240 Mussorgsky, Modest (Chronological)
544 Paganini, Niccolo (Chronological)
170 Part, Arvo (Chronological)
1054 Prokofiev, Sergei (chronlogical)
189 Ravel, Maurice (Chronological)
152 Reich, Steve (Chronological)
380 Rimsky-Korsakov, Nico

In [112]:
import json

In [148]:
with open('selected_artists.json', 'w') as fp:
    json.dump(playlist_tracks, fp)

In [72]:
# lim = 10
# n = 0
# for i in range(30):
#     results = sp.current_user_saved_tracks(limit=lim, offset=i*lim)
#     for item in results['items']:
#         track = item['track']
#         print('_'.join(track['artists'][0]['name'].split(' ')) + ',' + ''.join([c for c in track['name'] if c != ',']))
#         n += 1

In [65]:
# for playlist in playlists['items']:
# #     print(playlist['name'])
#     results = sp.user_playlist(user, playlist['id'],fields="tracks,next")
#     tracks = results['tracks']
#     show_tracks(tracks)
#     while tracks['next']:
#         tracks = sp.next(tracks)
#         show_tracks(tracks)

In [16]:
# cleaning artists
tracks['artists'] = tracks['artists'].apply\
    (lambda x: x[len('\ufeff'):] if x[:len('\ufeff')] == '\ufeff' else x)

In [17]:
# how many distinct artists do I have?
artists = set()
for a in list(tracks['artists'].unique()):
#     print('-------', a)
    names = a.split(' ')
    for n in names:
        artists.add(n)
len(artists)

94

In [18]:
# to check possible typos
sorted_artists = list(artists)
sorted_artists.sort()
# sorted_artists

In [20]:
# what are the most frequent artists?
artist_occurrences = dict()
for a in list(tracks['artists']):
    names = a.split(' ')
    for n in names:
        if n not in artist_occurrences.keys():
            artist_occurrences[n] = 0
        artist_occurrences[n] += 1

In [21]:
artist_occurrences_list = []
for a, o in artist_occurrences.items():
#     print('-',o)
    artist_occurrences_list.append([a, o])
artist_occurrences_list.sort(key=lambda x: -x[1])

# Listen to tracks

In [23]:
tracks_list = tracks.values.tolist()

In [24]:
import numpy as np
import math

In [154]:
i += 1
# i = 0
# i = len(tracks_list) - 1
t = tracks_list[i]
search = ''
for el in t:
    try:
        if math.isnan(el):
            pass
#             printt(el)
    except:
        el = el.replace('_', ' ')
        search += (el) + ' '
search = search[:-1]
query_string = urllib.parse.urlencode({"search_query" : search})
html_content = urllib.request.urlopen("http://www.youtube.com/results?" + query_string)
search_results = re.findall(r'href=\"\/watch\?v=(.{11})', html_content.read().decode())
print(search)
print("http://www.youtube.com/watch?v=" + search_results[0])

# to get genre and bpm
sb = search + ' ' 'beatport'
sbl = sb.split(' ')
s2 = '+'.join(sbl)
print("https://www.google.com/search?q=" + s2)


Högni Máni
http://www.youtube.com/watch?v=6MgJUyyiPnE
https://www.google.com/search?q=Högni+Máni+beatport


In [79]:
[s + '+' for s in search.split(' ')] + ['beatport']

['Steve+', 'Parker+', 'The+', 'Prophet+', 'beatport']

artist,title,genre,bpm,src,tags

tags: soft, chill, heavy, piano, voice, synth, brass, strings, orchestra