# Spotify Musical Analysis - Data Wrangling 

### Initial Imports and Pre-Settings 

In [2]:
import os
import json
import requests

import pandas as pd

### Reading Saved Songs Data 

In [3]:
CLIENT_TOKEN = os.environ['spotify_token']

In [11]:
URL = 'https://api.spotify.com/v1/me/tracks?offset=0&limit=50'
HEADERS = {'Authorization': 'Bearer ' + CLIENT_TOKEN}
PARAMS = {'limit': 50}

In [12]:
r = requests.get(url=URL, headers=HEADERS, params=PARAMS)
response, data = r.status_code, r.json()

if response == 200:
    all_data = [data]
    while data['next']:
        URL = data['next']
        r = requests.get(url=URL, headers=HEADERS, params=PARAMS)
        response, data = r.status_code, r.json()
        if response == 200:
            all_data.append(data)
            print('Request on {url} successful'.format(url=URL))
        else:
            print('Failed request on {url}'.format(url=URL))
            break
else:
    print('Error', response)

Request on https://api.spotify.com/v1/me/tracks?offset=50&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=100&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=150&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=200&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=250&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=300&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=350&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=400&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=450&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=500&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=550&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offset=600&limit=50 successful
Request on https://api.spotify.com/v1/me/tracks?offse

### Transforming into columnar 

In [13]:
frames = []
for data in all_data:
    frame = pd.DataFrame(data['items'])
    frames.append(frame)
songs = pd.concat(frames)

In [14]:
cols_to_keep = [
    'id',
    'name',
    'artists',
    'duration_ms',
    'popularity',
]

In [15]:
for col in cols_to_keep:
    songs.loc[:, col] = songs.track.apply(lambda x: x[col])

songs.loc[:, 'artists_ids'] = songs.artists.apply(lambda x: [i['id'] for i in x])
songs.drop(['track', 'artists'], axis=1, inplace=True)

In [16]:
songs.head()

Unnamed: 0,added_at,id,name,duration_ms,popularity,artists_ids
0,2019-09-28T00:48:35Z,4TIONK6tnZWlNl5HdKpWby,Dondante,479306,45,[43O3c6wewpzPKwVaGEEtBM]
1,2019-09-28T00:21:47Z,6ZDk3PGFkRKGgk6aSkw2T9,A Fine Way to Die,400982,49,"[25oLRSUjJk4YHNUsQXk7Ut, 2twGthIPnZl1icD7K2jXx3]"
2,2019-09-26T18:00:50Z,36xZeDLfY3pSGZdQadvEvd,Vermelho,302280,42,[2HubQ6TEbD7HknCNrBsqhR]
3,2019-09-26T17:06:12Z,23kqlWwszJs8NRFkCQ5L5U,Ensaio Sobre Ela,238664,50,[6a5wUPC879Kyfw0aXxVatB]
4,2019-09-26T15:30:48Z,6g0o3FK5GqsjPV488bBtDj,Dinossauros,266546,46,[3pnpaEYzaDj5zJluhXbVrG]


Now we need to get artists infos, such as genre and popularity.

In [29]:
unique_artists_ids = list(set(songs.artists_ids.sum()))

In [None]:
frames = []

for artist_id in unique_artists_ids:

    URL = 'https://api.spotify.com/v1/artists/{0}'.format(artist_id)
    PARAMS = {'id': artist_id}

    r = requests.get(url=URL, headers=HEADERS)
    response, data = r.status_code, r.json()

    if response == 200:
        frame = pd.DataFrame([[data['id'], data['name'], data['genres'], data['popularity']]])
        frames.append(frame)
    else:
        print('Request failed at artist {}'.format(artist_id))

In [27]:
data

{'external_urls': {'spotify': 'https://open.spotify.com/artist/6BIQO80XQtgE0ueJ1jRmBH'},
 'followers': {'href': None, 'total': 125},
 'genres': [],
 'href': 'https://api.spotify.com/v1/artists/6BIQO80XQtgE0ueJ1jRmBH',
 'id': '6BIQO80XQtgE0ueJ1jRmBH',
 'images': [],
 'name': 'Nick Allbrook',
 'popularity': 7,
 'type': 'artist',
 'uri': 'spotify:artist:6BIQO80XQtgE0ueJ1jRmBH'}

In [19]:
artists = pd.concat(frames)

In [32]:
artists.columns = ['artist_id', 'artist_name', 'genres', 'artist_popularity']

ValueError: Length mismatch: Expected axis has 3 elements, new values have 4 elements

In [23]:
songs.columns = ['added_at', 'song_id', 'song_name', 'duration_ms', 'song_popularity', 'artists_ids']

In [25]:
songs.to_csv('saved_songs.csv', sep='\t', index=False)
artists.to_csv('artists_infos.csv', sep='\t', index=False)