# Spotify Analytics - Group D

- Constança Drago Braga 
- Alexander Gross
- Khaled Mahmoud
- Diego Mata Saravia 
- Orlando Montalvo Puccini 
- Nadine Raad
- Guillermo Palacín Gómez 

## Authentication in the Spotify API

Credentials are stored in an external file for security purposes

In [1]:
import base64
import requests
import json
from urllib.parse import urlencode

def load_credentials():
    try:
        with open('credentials.json') as f:
            data = json.load(f)
            return data
    except:
        print("There was an error loading the credentials.")

def request_token(token_header):
    url =  'https://accounts.spotify.com/api/token'
    data = {'grant_type': 'client_credentials'}
    try:
        response = requests.post(url, data = data, headers = token_header)
        response_json = response.json()
        access_token = response_json['access_token']
        token_type = response_json['token_type']
        expires = response_json['expires_in']
        auth_header = {"Authorization": '{} {}'.format(token_type, access_token)}
        return auth_header
    except: 
        print("There was an error when requesting the access token. Contact MBD Team D Section 2 for support.")
    
def get_spotify_token(): 
    credentials = load_credentials()
    client_id = credentials['client_id']
    client_secret = credentials['client_secret']
    client_creds = "{}:{}".format(client_id, client_secret)
    c_b64 = base64.b64encode(client_creds.encode())
    token_header = {'Authorization': 'Basic {}'.format(c_b64.decode())}
    auth_header = request_token(token_header)
    return auth_header
  

In [None]:
header = get_spotify_token()

## Search Request

## Load the usernames

In [None]:
import yaml

def load_usernames():
    try:
        with open('usernames.yaml', 'r') as f:
            data = yaml.safe_load(f)
            return data
    except Exception as e:
        print("There was an error loading the Usernames. Contact MBD Team D Section 2 for support.")
        print(e)
        
usernames = load_usernames()
usernames

## Search Playlist User + Tracks of playlist and save results to HDFS
Saving to HDFS the user's playlists and its tracks with the format ```/datalake/raw/spotify/\<user_id\>/\<playlist_id\>.json```

In [None]:
from hdfs import InsecureClient
import time
client = InsecureClient('http://localhost:50070', user='osbdet')


base_url = 'https://api.spotify.com/v1'
header['Content-Type'] = 'application/json'


def get_audio_features_song(lst):
    n = 99
    list_chuncks = [lst[i:i + n] for i in range(0, len(lst), n)]
    url = 'https://api.spotify.com/v1/audio-features'
    for l in range(len(list_chuncks)):
        try:
            data = urlencode({"ids":"{}".format(','.join(list_chuncks[l]))})
            lookup_url = f'{url}?{data}'
            r = requests.get(lookup_url, headers = header)
            features_song = r.json()
            client.write('/datalake/raw/spotify/song_features/songs_{}.json'.format(l*100), data=json.dumps(features_song), overwrite=True)
        except:
            pass

        
songs = []
for u in usernames['usernames']:
    url = base_url + '/users/{}/playlists'.format(u['id'])
    r = requests.get(url, headers = header)
    user_playlists = r.json()    
    try:
        for p in user_playlists['items']: 
            url = base_url + '/playlists/{}'.format(p['id'])
            r = requests.get(url, headers = header)
            playlist_info = r.json()
            playlist_info['user_playlist_id'] = u['id']
            playlist_info['user_name'] = u['name']
            client.write('/datalake/raw/spotify/users/{}/{}.json'.format(u['id'], p['id']), data=json.dumps(playlist_info), overwrite=True) 
            #time.sleep(30)
            for song in playlist_info['tracks']['items']:
                try:
                    songs.append(song['track']['id'])
                except:
                    pass
    except:
        pass
get_audio_features_song(songs)



    

In [None]:
! pip install flask