## Install dependencies

In [None]:
!pip install -r requirements.txt

## Imports and setup

In [26]:
import os
import requests
import pandas as pd
import pickle as pkl

import spotify.sync as spotify

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

CLIENT_ID = os.environ['SPOTIPY_CLIENT_ID']
CLIENT_SECRET = os.environ['SPOTIPY_CLIENT_SECRET']

In [4]:
CLIENT_ID, CLIENT_SECRET

('4d998967676944babefda544d3e14570', '184c43073fac4533a613cf35c30d9127')

In [5]:
data = {
  'grant_type': 'client_credentials',
  'client_id': CLIENT_ID,
  'client_secret': CLIENT_SECRET,
}
response = requests.post('https://accounts.spotify.com/api/token', data=data)
credentials = response.json()

### Spotify.py try

In [None]:
client = spotify.Client(CLIENT_ID, CLIENT_SECRET)

In [None]:
user = await spotify.User.from_token(client, credentials['access_token'])

In [None]:
user.user.currently_playing

In [None]:
user_2 = spotify.User(client=client, data={'id': 'spotify'})

### Spotipy try

In [2]:
auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

In [3]:
playlists = sp.user_playlists('s44tvciud5tday4wgqy4ct4gl')

source_playlist = None
target_playlist = 'Quilombos Musicales'

for playlist in playlists['items']:
  if playlist['name'] == target_playlist:
    target_playlist = playlist
    break


In [23]:
items_tracks = []

idx_total = 0
while idx_total < 1306:
  items = sp.playlist_items(target_playlist['uri'], offset=idx_total, limit=100)
  items_tracks += items['items']
  idx_total += 100

pkl.dump(items_tracks, open('items_tracks.pkl', 'wb'))

In [53]:
def get_artists(track):
  temp = data.get('track', {})
  if temp:
    artists = temp.get('artists')
    if artists and len(artists) > 0:
      return artists

  try:
    return sp.search(track['name'])['tracks']['items'][0]['artists']
  except Exception as e:
    print('Get artists failed', e)
    return None

def get_artist_id(artist, cache={}):
  if artist['name'] in cache:
    return cache[artist['name']]['id']

  artist_id = artist.get('id')
  if artist_id:
    return artist_id

  try:
    return sp.search(artist['name'])['tracks']['items'][0]['artists'][0]['id']
  except Exception as e:
    print('Get artist id failed', e)
    return None


In [47]:
cache_table = {}

In [57]:
df = pd.DataFrame()
cache_table = pkl.load(open('cache_table.pkl', 'rb'))

for data in items_tracks:
  artists = get_artists(data)
  if not artists or len(artists) == 0:
    continue

  for artist in data['track']['artists']:
    artist_id = get_artist_id(artist, cache_table)
    if artist['name'] not in cache_table:
      cache_table[artist['name']] = artist
      pkl.dump(cache_table, open('cache_table.pkl', 'wb'))

    if not artist_id:
      continue
    
    artist_genres = sp.artist(artist_id)['genres']
    for genre in artist_genres:
      item = {
        'artist_id': artist_id,
        'artist': artist['name'],
        'genre': genre,
        'track_id': data['track']['id'],
        'track': data['track']['name'],
        'track_number': data['track']['track_number'],
        'is_local': data['track']['is_local'],
      }
      df = pd.concat([df, pd.DataFrame(item, index=[0])])

df.to_pickle('data_tracks.pkl')

Get artists failed 'name'


In [58]:
df

Unnamed: 0,artist_id,artist,genre,track_id,track,track_number,is_local
0,4cdyqaBREB68H77QKCrKP1,Dubdogz,brazilian bass,2UGP2VAHZRqxETjJuPohwQ,Round N Round,1,False
0,4cdyqaBREB68H77QKCrKP1,Dubdogz,brazilian edm,2UGP2VAHZRqxETjJuPohwQ,Round N Round,1,False
0,6mDl7lQiLxT0iQ8LYhAlWy,Zerb,brazilian edm,2UGP2VAHZRqxETjJuPohwQ,Round N Round,1,False
0,4mHAu7NX2UNsnGXjviBD9e,Brooks,edm,6lLHhKjeqWWNa0hTbJN98R,Take My Breath Away,1,False
0,4mHAu7NX2UNsnGXjviBD9e,Brooks,electro house,6lLHhKjeqWWNa0hTbJN98R,Take My Breath Away,1,False
...,...,...,...,...,...,...,...
0,3nlpTZci9O5W8RsNoNH559,Juan Luis Guerra 4.40,tropical,1V9FVEr3vxol6BHZBVOrhK,Mi PC,1,False
0,3nlpTZci9O5W8RsNoNH559,Juan Luis Guerra 4.40,bachata,6KsWOolnMtLToxv6XpYjmi,La Guagua,2,False
0,3nlpTZci9O5W8RsNoNH559,Juan Luis Guerra 4.40,latin,6KsWOolnMtLToxv6XpYjmi,La Guagua,2,False
0,3nlpTZci9O5W8RsNoNH559,Juan Luis Guerra 4.40,latin pop,6KsWOolnMtLToxv6XpYjmi,La Guagua,2,False


In [None]:
# drop duplicates track_id
df.groupby(['artist_id', 'artist', 'genre', 'track', 'track_id']).count().reset_index()

In [70]:
df_pro = df.drop_duplicates(subset=['artist_id', 'artist', 'genre', 'track', 'track_id'])