This notebook was a check for the modules developed at the time. I wanted to make sure everything worked in sequence before building a local_main.py

In [1]:
%load_ext autoreload
%autoreload 2

import requests
import pandas as pd
data = pd.read_csv('./raw_data/tracks_features.csv')

In [22]:
api_key = 'cfc7fb20ddfa769f9def3f4dc02bc45d'

def api_request(artist_name):
    params = {'date':'upcoming'}
    artist_name = artist_name

    url = 'https://rest.bandsintown.com/artists/'
    response = requests.get(f'{url}{artist_name}/events?app_id={api_key}', params=params)
    # prediction1 = response.json()[1]['venue']['city']
    # prediction1
    prediction = response.json()
    cities = []
    times = []
    countries = []
    for i, pred in enumerate(prediction):
        city = response.json()[i]['venue']['city']
        country = response.json()[i]['venue']['country']
        time = response.json()[i]['starts_at']
        cities.append(city)
        times.append(time)
        countries.append(country)
    tabledata=[cities, countries, times]
    df = pd.DataFrame(tabledata)

    df = df.transpose()
    df.columns = ['City', 'Country', 'Date and time']
    df['Date and time']=df['Date and time'].str.replace('T', ' ')

    return df

In [3]:
from sklearn.neighbors import NearestNeighbors

def fit_nn(data_pca, n_neighbors):

    song_knn = NearestNeighbors(n_neighbors=n_neighbors).fit(data_pca)

    return song_knn

In [4]:
from sklearn.decomposition import PCA

def pca(preproc_num, n_components):
    pca = PCA(n_components=n_components)
    data_pca = pca.fit_transform(preproc_num)

    return data_pca

In [5]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import RobustScaler
import pandas as pd

def preprocess_data(data):
    def preproc_pipeline():
        features_num = [
                "danceability",
                "energy",
                "acousticness",
                "instrumentalness",
                "valence",
                "tempo",
                "duration_ms",
                "loudness",
                "speechiness",
                "liveness"
                ]

            #features_ohe = [
            #    "key",
            #    "time_signature",
                #"explicit"
            #  ]

        preproc_pipe = make_column_transformer(
            (RobustScaler(),features_num),
            #  (OneHotEncoder(sparse_output = False),features_ohe),
            remainder="passthrough"
            )

        return preproc_pipe

    pipeline = preproc_pipeline()

    df_preprocessed = pipeline.fit_transform(data)

    df_preprocessed = pd.DataFrame(df_preprocessed)

    return df_preprocessed

def preprocess_numerical(data):
    data_num = data.select_dtypes(exclude = ['object']).drop(columns = ['track_number', 'disc_number', 'key'])

    preproc_num = preprocess_data(data_num)

    return preproc_num

In [20]:
def init_model(data,
               n_neighbors,
               n_components=7):
    """
    Initialize the model and return the model object
    """
    preproc_num = preprocess_numerical(data)
    data_pca = pca(preproc_num, n_components)
    song_knn = fit_nn(data_pca, n_neighbors)

    return song_knn, data_pca


def find_artist(data,
                artist_name,
                song_name,
                n_neighbors, model, data_proc):
    """
    Finds {n_neighbors} artists that sound like a given song
    """
    #model, data_proc = init_model(data, n_neighbors)

    song_index = data.index[(data.name == song_name ) & (data.artists.str.strip('[]').str.strip("'") == artist_name)][0]

    similar_artists_i = model.kneighbors(data_proc[song_index].reshape(1, -1),
                                         return_distance=False)
    
    similar_artists = data.iloc[similar_artists_i[0]]['artists'].str.strip('[]').str.strip("'").tolist()

    return similar_artists


def find_gig(data,
             artist_name,
             song_name,
             n_neighbors, model, data_proc):
    """
    Find the proposed gigs based on a given song
    """

    gig_list = []

    similar_artists = find_artist(data,
                                 artist_name,
                                 song_name,
                                 n_neighbors, model, data_proc)

    for artist in similar_artists:
        gig = api_request(artist)

        if len(gig) != 0:
            return 'No gigs found'
        else:
            gig_list.append(gig)

    return gig_list

In [24]:
find_gig(data, 'Radiohead', 'Creep', n_neighbors=20000, model=model, data_proc=data_proc)

'No gigs found'

: 

In [14]:
find_artist(data, 'Radiohead', 'Creep', 10, model, data_proc)

605370

In [9]:
model, data_proc = init_model(data, n_neighbors=10, n_components=7)