In [186]:
import requests
import base64
import json
from secrets import *
import csv
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()
ID = os.getenv('ID')
PSW = os.getenv('PSW')

# Step 1 - Authorization 
url = "https://accounts.spotify.com/api/token"
headers = {}
data = {}

# Encode as Base64
message = f"{ID}:{PSW}"
messageBytes = message.encode('ascii')
base64Bytes = base64.b64encode(messageBytes)
base64Message = base64Bytes.decode('ascii')


headers['Authorization'] = f"Basic {base64Message}"
data['grant_type'] = "client_credentials"

r = requests.post(url, headers=headers, data=data)

token = r.json()['access_token']


In [187]:
def query_data(params='genre:rock year:2022', offset=0):

    search_url = f"https://api.spotify.com/v1/search"

    headers = {
        "Authorization": "Bearer " + token
    }

    data = {
        'q' : params,
        'type': 'track',
        'limit': '50',
        'offset': str(offset),
        'market' :'US'
    }

    res = requests.get(url=search_url, headers=headers, params=data)
    items = res.json()['tracks']['items']

    ids = []
    pops = []
    names = []

    for item in items :
        ids.append(item['id'])
        names.append(item['name'])
        pops.append(item['popularity'])

    return ids, pops, names

    

In [188]:
offset = 0
ids = []
pops = []
names = []

while offset < 950 :
    offset += 50
    ids_t, pops_t, names_t = query_data(params='genre:rock year:2022', offset=offset)
    ids = ids + ids_t
    pops = pops + pops_t
    names = names + names_t

In [189]:
df = pd.DataFrame({'id' : ids, 'popularity' : pops, 'names': names}, index=range(len(ids)))

In [190]:
df

Unnamed: 0,id,popularity,names
0,3qhlB30KknSejmIvZZLjOD,61,End of Beginning
1,0sBJA2OCEECMs0HsdIQhvR,69,West Coast
2,0uBdQzKghx88d2Lp8SLFKJ,67,I'm In Love With You
3,4xAmbn7xiy6wGOs4oaxv70,64,Call Me Little Sunshine
4,7JIIY2oDPFkURYMCLJuzhe,65,"Elliot's Song - From ""Euphoria"" An HBO Origina..."
...,...,...,...
945,6UqbBLltlDQ7ozlRswVWeu,54,Los Hombres No Deben Llorar
946,1sJYOOldqYLjwIVPLXt2Fl,42,Hung The Moon
947,6r4jouSQy2Hl77pwMwChcM,41,Thank You
948,7m3Lu7PUKNDJ2HgvrAKME9,41,Silence Is Golden


In [191]:
def get_features(ids_list=ids): 
    
    headers = {
    "Authorization": "Bearer " + token
    }
    danceability_list = []
    energy_list = []
    acousticness_list = []
    tempo_list = []
    for id_unique in ids :
        featurers_url = f"https://api.spotify.com/v1/audio-features/{id_unique}"
        data = {'id' : id_unique}
        res = requests.get(url=featurers_url, headers=headers, params=data)
        items = res.json()
        danceability_list.append(items.get('danceability'))
        energy_list.append(items.get('energy'))
        acousticness_list.append(items.get('acousticness'))
        tempo_list.append(items.get('tempo'))
    return danceability_list, energy_list, acousticness_list ,tempo_list

In [192]:
danceability_list, energy_list, acousticnes_list ,tempo_list = get_features(ids_list=ids)

In [193]:
df['danceability'] = danceability_list
df['energy'] = energy_list
df['acousticness'] = acousticnes_list
df['tempo'] = tempo_list

In [194]:
df = df.dropna()

In [195]:
from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split

lr = LinearRegression()

X = df[['danceability','energy', 'acousticness', 'tempo']]
y = df['popularity']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)


lr.fit(X_train,y_train)

In [196]:
lr.score(X_test,y_test)

0.005945952270670962

In [197]:
ids_60, pops_60, names_60 = query_data(params='genre:rock year:1960')

In [198]:
danceability_list, energy_list = get_features(ids_list=ids_60)

ValueError: too many values to unpack (expected 2)

In [None]:
df_60 = pd.DataFrame({'names': names, 'popularity' : pops_60, 'danceability' : danceability_list, 'energy' : energy_list}, index=range(len(ids)))

In [None]:
X_test = df_60[['danceability', 'energy', 'names']].set_index('names')
y_test = df_60[['popularity', 'names']].set_index('names')

In [None]:
pred = lr.predict(X_test)

In [None]:
X_test['pred'] = pred

In [None]:
X_test['pred']

names
I Ain't Worried                                                70.979221
She Had Me At Heads Carolina                                   68.771406
Bones                                                          72.349264
About You                                                      67.239440
Bones                                                          72.275538
We Got History                                                 69.654018
Fancy Like                                                     70.426574
Your Heart Or Mine                                             70.704323
Hold Me Closer                                                 70.816642
Enemy (with JID) - from the series Arcane League of Legends    71.417388
THE LONELIEST                                                  69.919523
Worth A Shot (feat. Dierks Bentley)                            68.805338
Thinking 'Bout You (feat. MacKenzie Porter)                    67.440239
Voices In My Head                            

In [None]:
lr.score(X_test, y_test)

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- pred
