In [11]:
import sys
import os
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import csv
import numpy as np
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from keras.models import model_from_json
import joblib

os.environ['SPOTIPY_CLIENT_ID'] = 'SPOTIPY_CLIENT_ID'
os.environ['SPOTIPY_CLIENT_SECRET'] = 'SPOTIPY_CLIENT_SECRET'

client_credentials_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [None]:
def get_artist(name):
    results = sp.search(q='artist:' + name, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        return items[0]
    else:
        return None
    

def get_artist_features(name):
    artist = get_artist(name)
    results = sp.artist_albums(artist['id'])
    albums = results['items']
    album_ids = []
    for album in albums:
        album_ids.append(album['id'])
    
    # features: valence, acousticness, danceability,
    #           energy, instrumentalness, speechiness, & tempo, liveness
    features = []
    for album_id in album_ids:
        results = sp.album_tracks(album_id)
        tracks = results['items']

        for track in tracks:
            audioFeatures = sp.audio_features(track['id'])[0]
            pop = sp.track(track['id'])['popularity']
            features.append([track['name'], audioFeatures['duration_ms'], audioFeatures['key'], audioFeatures['mode'], 
                             audioFeatures['time_signature'], audioFeatures['acousticness'], audioFeatures['danceability'], 
                             audioFeatures['energy'], audioFeatures['instrumentalness'], audioFeatures['liveness'], 
                             audioFeatures['loudness'], audioFeatures['speechiness'], audioFeatures['valence'], 
                             audioFeatures['tempo'], pop])

    return features

def store_artist_features(name):
    with open(path_to_artist_info + name + '.csv', 'w', encoding="utf-8-sig", newline='') as csvfile:
        features = get_artist_features(name)
        writer = csv.writer(csvfile)
        writer.writerow(['Name', 'Duration (MS)', 'Key', 'Mode', 'Time Signature', 'Acousticness', 'Danceability', 
                        'Energy', 'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness', 'Valence', 'Tempo', 'Popularity'])
        
        for row in features:
            writer.writerow(row)

In [None]:
artistsNG = ["Fromis_9", "Weki Meki", "Bolbbalgan4", "Gugudan"]

for artist in artists:
    store_artist_features(artist)

#https://api.spotify.com/v1/tracks/{0PAq1DRlOGBAjXLno94j6b}

In [7]:
path_to_artist_info = '/content/drive/MyDrive/spotify-neural-network-master/'

In [12]:
name = []
duration = []
key = []
mode = []
time_sig = []
acousticness = []
danceability = []
energy = []
instrumentalness = []
liveness = []
loudness = []
speechiness = []
valence = []
tempo = []
gender = []
group = []
popularity = []

for artist in artists:
    if artist in guys:
        group_gender = 'M'
    else:
        group_gender = 'F'
    with open(path_to_artist_info + 'all_artist' + ".csv", newline='', encoding='utf-8-sig') as csvfile:
        has_header = csv.Sniffer().has_header(csvfile.readline())
        csvfile.seek(0)  # Rewind.
        reader = csv.reader(csvfile)
        if has_header:
            next(reader)
        for row in reader:
            if float(row[3]) < 300000:
                    name.append(row[0])
                    group.append(artist)
                    gender.append(group_gender)
                    duration.append(float(row[3]))
                    key.append(float(row[4]))
                    mode.append(float(row[5]))
                    time_sig.append(float(row[6]))
                    acousticness.append(float(row[7]))
                    danceability.append(float(row[8]))
                    energy.append(float(row[9]))
                    instrumentalness.append(float(row[10]))
                    liveness.append(float(row[11]))
                    loudness.append(float(row[12]))
                    speechiness.append(float(row[13]))
                    valence.append(float(row[14]))
                    tempo.append(float(row[15]))
                    popularity.append(float(row[16]))

In [13]:
with open("all_artist.csv", 'w', newline='', encoding='utf-8-sig') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Name of Song', 'Group', 'Gender', 'Duration (MS)', 'Key', 'Mode', 'Time Signature', 'Acousticness', 'Danceability', 
                        'Energy', 'Instrumentalness', 'Liveness', 'Loudness', 'Speechiness', 'Valence', 'Tempo', 'Popularity'])
        
    for i in range(len(name)):
        writer.writerow([name[i], group[i], gender[i], duration[i], key[i], mode[i], time_sig[i], acousticness[i], 
                         danceability[i], energy[i], instrumentalness[i], liveness[i], loudness[i], speechiness[i], valence[i], 
                         tempo[i], popularity[i]])

In [14]:
def get_data_by_gender():
    audio_features = np.empty((0,13))
    gender = np.empty((0,1))
    with open("/content/drive/MyDrive/spotify-neural-network-master/all_artist.csv", newline='', encoding='utf-8-sig') as csvfile:
        has_header = csv.Sniffer().has_header(csvfile.readline())
        csvfile.seek(0)  # Rewind.
        reader = csv.reader(csvfile)
        if has_header:
            next(reader)
        for row in reader:
            gender = np.append(gender, row[2])
            audio_features = np.append(audio_features, [[float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), 
                                   float(row[9]), float(row[10]), float(row[11]), float(row[12]), float(row[13]), 
                                   float(row[14]), float(row[15])]], axis=0)
    return audio_features, gender

def get_data_by_popularity():
    audio_features = np.empty((0,13))
    pop = np.empty((0,1))
    with open("/content/drive/MyDrive/spotify-neural-network-master/all_artist.csv", newline='', encoding='utf-8-sig') as csvfile:
        has_header = csv.Sniffer().has_header(csvfile.readline())
        csvfile.seek(0)  # Rewind.
        reader = csv.reader(csvfile)
        if has_header:
            next(reader)
        for row in reader:
            #print(row[1])
            pop = np.append(pop, row[16])
            audio_features = np.append(audio_features, [[float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), 
                                   float(row[9]), float(row[10]), float(row[11]), float(row[12]), float(row[13]), 
                                   float(row[14]), float(row[15])]], axis=0)
    return audio_features, pop

In [15]:
from keras import optimizers
from keras.metrics import categorical_accuracy
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import np_utils

x1, y1 = get_data_by_gender()
encoder = LabelEncoder()
encoder.fit(y1)
encoded_y1 = encoder.transform(y1)
x2, y2 = get_data_by_popularity()

In [16]:
#gender
# baseline model
def create_baseline_gender():
    # create model
    model = Sequential()
    model.add(Dense(60, input_dim=13, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

estimators = []
estimators.append(('standardize', StandardScaler()))
model = KerasClassifier(build_fn=create_baseline_gender, epochs=100, batch_size=5, verbose=1)
estimators.append(('mlp', model))
pipeline = Pipeline(estimators)
#kfold = StratifiedKFold(n_splits=10, shuffle=True)
#results = cross_val_score(pipeline, x1, encoded_y1, cv=kfold)
#print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
#BASELINE

  


In [17]:
#now fit training data
fitted = pipeline.fit(x1, encoded_y1)
filename = 'finalized_model_gender.sav'
joblib.dump(pipeline, filename)
predicted = pipeline.predict(x1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [18]:
for i in range(27):
    print(pipeline.predict([[i,i,i,i,i,i,i,i,i,i,i,i,i]]))

[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]
[[1]]


In [19]:
import csv
from sklearn import preprocessing
def get_data():
    excel_file = []
    with open("/content/drive/MyDrive/spotify-neural-network-master/all_artist.csv", newline='', encoding='utf-8-sig') as csvfile:
        has_header = csv.Sniffer().has_header(csvfile.readline())
        csvfile.seek(0)  # Rewind.
        reader = csv.reader(csvfile)
        if has_header:
            next(reader)
        for row in reader:
            excel_file.append([row[0], row[1], float(row[3]), float(row[4]), float(row[5]), float(row[6]), float(row[7]), float(row[8]), 
                                   float(row[9]), float(row[10]), float(row[11]), float(row[12]), float(row[13]), 
                                   float(row[14]), float(row[15])])
    return excel_file

def sounds_closest_to(f):
    durationMM = [5000, 299880.0]
    keyMM = [0, 11]
    modeMM = [0, 1]
    time_sigMM = [0, 5]
    acousticnessMM = [0, 0.995]
    danceabilityMM = [0.0, 0.954]
    energyMM = [0.0116, 0.999]
    instrumentalnessMM = [0.0, 0.989]
    livenessMM = [0.0, 0.987]
    loudnessMM = [-29.375, -0.005]
    speechinessMM = [0.0, 0.955]
    valenceMM = [0.0, 0.978]
    tempoMM = [0.0, 248.052]
    mm = [durationMM, keyMM, modeMM, time_sigMM, acousticnessMM, danceabilityMM, energyMM, instrumentalnessMM, livenessMM,
          loudnessMM, speechinessMM, valenceMM, tempoMM]
    artists = get_data()
    least_squares = []
    for artist in artists:
        ls = 0
        for i in range(len(f)):
            ls += (normalize(f[i], mm[i]) - normalize(artist[i+2], mm[i]))**2
        least_squares.append(ls)
    indexOfMin = least_squares.index(min(least_squares))
    return artists[indexOfMin][0]

def normalize(x, mm):
    z = (x-mm[0])/(mm[1] - mm[0])
    return z

In [20]:
sounds_closest_to([206187,10,0,4,0.00467,0.771,0.894,0,0.349,-4.981,0.0367,0.584,120.003])

'Kiss - Live'

In [25]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

def create_baseline_regression():

    model = Sequential()
    model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal'))
    # Compile model
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=create_baseline_regression, epochs=50, batch_size=5, verbose=1)))
pipeline = Pipeline(estimators)
pipeline.fit(x2, y2)



Epoch 1/50


UnimplementedError: ignored

In [26]:
pipeline.predict(x1)



array([-0.06389932, -0.03922752, -0.01197201, ..., -0.0213007 ,
       -0.00027984,  0.00545166], dtype=float32)