# Song Popularity Prediction

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv("song_data.csv")
data.head()

Unnamed: 0,song_name,song_popularity,song_duration_ms,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,audio_mode,speechiness,tempo,time_signature,audio_valence
0,Boulevard of Broken Dreams,73,262333,0.00552,0.496,0.682,2.9e-05,8,0.0589,-4.095,1,0.0294,167.06,4,0.474
1,In The End,66,216933,0.0103,0.542,0.853,0.0,3,0.108,-6.407,0,0.0498,105.256,4,0.37
2,Seven Nation Army,76,231733,0.00817,0.737,0.463,0.447,0,0.255,-7.828,1,0.0792,123.881,4,0.324
3,By The Way,74,216933,0.0264,0.451,0.97,0.00355,0,0.102,-4.938,1,0.107,122.444,4,0.198
4,How You Remind Me,56,223826,0.000954,0.447,0.766,0.0,10,0.113,-5.065,1,0.0313,172.011,4,0.574


In [3]:
features = data.drop("song_name", axis=1)
features = features.drop("instrumentalness", axis=1)
features = features.drop("loudness", axis=1)
features = features.drop("song_popularity", axis=1)
Y = data["song_popularity"]

In [4]:
Y = (Y/100).values
Y

array([0.73, 0.66, 0.76, ..., 0.23, 0.55, 0.6 ])

In [5]:
def loss_fun_one(x, y, w):
    m = x.shape[0] 
    loss = 0
    for i in range(m):
        fw = w * x[i]
        loss = loss + (fw - y[i])**2
    total_loss = 1 / (2 * m) * loss

    return total_loss

In [6]:
def loss_function(X, y, w): 
    m = X.shape[0]
    loss = 0.0
    for i in range(m):                                
        fw_i = np.dot(X[i], w)
        loss = loss + (fw_i - y[i])**2
    loss = loss / (2 * m)    
    return loss

In [7]:
def find_gradient_one(x, y, w): 
    m = x.shape[0]    
    d_dw = 0
    for i in range(m):  
        fw = w * x[i]
        d_dw_i = (fw - y[i]) * x[i] 
        d_dw += d_dw_i 
    d_dw = d_dw / m 
        
    return d_dw

In [8]:
def find_gradient(X, y, w): 
    m,n = X.shape           #(number of examples, number of features)
    d_dw = np.zeros((n,))
    for i in range(m):                          
        err = (np.dot(X[i], w)) - y[i]   
        for j in range(n):                         
            d_dw[j] = d_dw[j] + err * X[i, j]                        
    d_dw = d_dw / m                                
        
    return d_dw

In [9]:
def gradient_descent_one(x, y, w, theta, iters): 
    for i in range(iters):
        d_dw = find_gradient_one(x, y, w)     
        w = w - theta * d_dw                            
    return w

In [10]:
def gradient_descent(X, y, w, theta, iters): 
    for i in range(iters):
        d_dw = find_gradient(X, y, w)
        w = w - theta * d_dw
        
    return w

In [11]:
def predict_one(x, w):
    m = x.shape[0]
    y = np.zeros((m,))
    for i in range(m):
        y[i] = x[i]* w
    return y

In [12]:
def predict(X, w):
    m,n = X.shape
    y = np.zeros((m,))
    for i in range(m):
        y[i] = np.dot(X[i], w)
    return y

For one feature:
Energy

In [13]:
X_one = data["energy"].values
X_one

array([0.682, 0.853, 0.463, ..., 0.325, 0.326, 0.381])

In [26]:
w_in = np.zeros((1,))
iterations = 1000
theta = 0.5
w_one = gradient_descent_one(X_one, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_one}")

w found by gradient descent: [0.74018662]


In [27]:
y_one = predict_one(X_one, w_one)
y_one

array([0.50480728, 0.63137919, 0.34270641, ..., 0.24056065, 0.24130084,
       0.2820111 ])

In [28]:
print(f"Loss by using one feature and 0.5 learning rate: {loss_fun_one(X_one, Y, w_one)}")

Loss by using one feature and 0.5 learning rate: [0.03787889]


In [29]:
w_in = np.zeros((1,))
iterations = 1000
theta = 0.1
w_one = gradient_descent_one(X_one, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_one}")

w found by gradient descent: [0.74018662]


In [30]:
print(f"Loss by using one feature and 0.1 learning rate: {loss_fun_one(X_one, Y, w_one)}")

Loss by using one feature and 0.1 learning rate: [0.03787889]


In [31]:
w_in = np.zeros((1,))
iterations = 1000
theta = 0.01
w_one = gradient_descent_one(X_one, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_one}")

w found by gradient descent: [0.73296085]


In [32]:
print(f"Loss by using one feature and 0.01 learning rate: {loss_fun_one(X_one, Y, w_one)}")

Loss by using one feature and 0.01 learning rate: [0.03789095]


In [33]:
w_in = np.zeros((1,))
iterations = 1000
theta = 0.001
w_one = gradient_descent_one(X_one, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_one}")

w found by gradient descent: [0.27383462]


In [34]:
print(f"Loss by using one feature and 0.001 learning rate: {loss_fun_one(X_one, Y, w_one)}")

Loss by using one feature and 0.001 learning rate: [0.08810196]


In [35]:
features

Unnamed: 0,song_duration_ms,acousticness,danceability,energy,key,liveness,audio_mode,speechiness,tempo,time_signature,audio_valence
0,262333,0.005520,0.496,0.682,8,0.0589,1,0.0294,167.060,4,0.474
1,216933,0.010300,0.542,0.853,3,0.1080,0,0.0498,105.256,4,0.370
2,231733,0.008170,0.737,0.463,0,0.2550,1,0.0792,123.881,4,0.324
3,216933,0.026400,0.451,0.970,0,0.1020,1,0.1070,122.444,4,0.198
4,223826,0.000954,0.447,0.766,10,0.1130,1,0.0313,172.011,4,0.574
...,...,...,...,...,...,...,...,...,...,...,...
18830,159645,0.893000,0.500,0.151,11,0.1110,1,0.0348,113.969,4,0.300
18831,205666,0.765000,0.495,0.161,11,0.1050,0,0.0301,94.286,4,0.265
18832,182211,0.847000,0.719,0.325,0,0.1250,1,0.0355,130.534,4,0.286
18833,352280,0.945000,0.488,0.326,3,0.1190,1,0.0328,106.063,4,0.323


In [37]:
X_three = features.drop("song_duration_ms", axis = 1)
X_three = X_three.drop("key", axis = 1)
X_three = X_three.drop("liveness", axis = 1)
X_three = X_three.drop("audio_mode", axis = 1)
X_three = X_three.drop("speechiness", axis = 1)
X_three = X_three.drop("tempo", axis = 1)
X_three = X_three.drop("time_signature", axis = 1)
X_three = X_three.drop("audio_valence", axis = 1)
X_three = X_three.values
X_three

array([[0.00552, 0.496  , 0.682  ],
       [0.0103 , 0.542  , 0.853  ],
       [0.00817, 0.737  , 0.463  ],
       ...,
       [0.847  , 0.719  , 0.325  ],
       [0.945  , 0.488  , 0.326  ],
       [0.911  , 0.64   , 0.381  ]])

In [39]:
w_in = np.zeros((3,))
iterations = 1000
theta = 0.5
w_three = gradient_descent(X_three, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_three}")

w found by gradient descent: [0.16984478 0.45824615 0.2871338 ]


In [40]:
print(f"Loss by using three features and 0.5 learning rate: {loss_function(X_three, Y, w_three)}")

Loss by using three features and 0.5 learning rate: 0.02631496048567441


In [41]:
w_in = np.zeros((3,))
iterations = 1000
theta = 0.1
w_three = gradient_descent(X_three, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_three}")

w found by gradient descent: [0.17265549 0.45156345 0.29259745]


In [42]:
print(f"Loss by using three features and 0.1 learning rate: {loss_function(X_three, Y, w_three)}")

Loss by using three features and 0.1 learning rate: 0.02631599430347127


In [43]:
w_in = np.zeros((3,))
iterations = 1000
theta = 0.01
w_three = gradient_descent(X_three, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_three}")

w found by gradient descent: [0.17455716 0.39259226 0.34910914]


In [44]:
print(f"Loss by using three features and 0.01 learning rate: {loss_function(X_three, Y, w_three)}")

Loss by using three features and 0.01 learning rate: 0.026441459118948064


In [45]:
w_in = np.zeros((3,))
iterations = 1000
theta = 0.001
w_three = gradient_descent(X_three, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_three}")

w found by gradient descent: [0.08911828 0.22394186 0.22345244]


In [46]:
print(f"Loss by using three features and 0.001 learning rate: {loss_function(X_three, Y, w_three)}")

Loss by using three features and 0.001 learning rate: 0.04913086695031203


In [47]:
X_five = features.drop("song_duration_ms", axis = 1)
X_five = X_five.drop("key", axis = 1)
X_five = X_five.drop("audio_mode", axis = 1)
X_five = X_five.drop("tempo", axis = 1)
X_five = X_five.drop("time_signature", axis = 1)
X_five = X_five.drop("audio_valence", axis = 1)
X_five = X_five.values
X_five

array([[0.00552, 0.496  , 0.682  , 0.0589 , 0.0294 ],
       [0.0103 , 0.542  , 0.853  , 0.108  , 0.0498 ],
       [0.00817, 0.737  , 0.463  , 0.255  , 0.0792 ],
       ...,
       [0.847  , 0.719  , 0.325  , 0.125  , 0.0355 ],
       [0.945  , 0.488  , 0.326  , 0.119  , 0.0328 ],
       [0.911  , 0.64   , 0.381  , 0.104  , 0.0302 ]])

In [48]:
w_in = np.zeros((5,))
iterations = 1000
theta = 0.5
w_five = gradient_descent(X_five, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_five}")

w found by gradient descent: [1.66976138e-01 4.57827980e-01 2.79135170e-01 3.49081282e-02
 3.00819197e-05]


In [49]:
print(f"Loss by using five features and 0.5 learning rate: {loss_function(X_five, Y, w_five)}")

Loss by using five features and 0.5 learning rate: 0.02630249248167228


In [50]:
w_in = np.zeros((5,))
iterations = 1000
theta = 0.1
w_five = gradient_descent(X_five, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_five}")

w found by gradient descent: [0.16975665 0.44773697 0.28335249 0.0373317  0.02450493]


In [51]:
print(f"Loss by using five features and 0.1 learning rate: {loss_function(X_five, Y, w_five)}")

Loss by using five features and 0.1 learning rate: 0.026306860280490504


In [52]:
w_in = np.zeros((5,))
iterations = 1000
theta = 0.01
w_five = gradient_descent(X_five, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_five}")

w found by gradient descent: [0.16938473 0.37912877 0.33235994 0.08423832 0.0538264 ]


In [53]:
print(f"Loss by using five features and 0.01 learning rate: {loss_function(X_five, Y, w_five)}")

Loss by using five features and 0.01 learning rate: 0.026488731773825252


In [54]:
w_in = np.zeros((5,))
iterations = 1000
theta = 0.001
w_five = gradient_descent(X_five, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_five}")

w found by gradient descent: [0.08766357 0.22005667 0.21930536 0.06002408 0.03510742]


In [55]:
print(f"Loss by using five features and 0.001 learning rate: {loss_function(X_five, Y, w_five)}")

Loss by using five features and 0.001 learning rate: 0.04733398669615134


In [61]:
X_seven = features.drop("song_duration_ms", axis = 1)
X_seven = X_seven.drop("key", axis = 1)
X_seven = X_seven.drop("tempo", axis = 1)
X_seven = X_seven.drop("time_signature", axis = 1)
X_seven = X_seven.values
X_seven

array([[0.00552, 0.496  , 0.682  , ..., 1.     , 0.0294 , 0.474  ],
       [0.0103 , 0.542  , 0.853  , ..., 0.     , 0.0498 , 0.37   ],
       [0.00817, 0.737  , 0.463  , ..., 1.     , 0.0792 , 0.324  ],
       ...,
       [0.847  , 0.719  , 0.325  , ..., 1.     , 0.0355 , 0.286  ],
       [0.945  , 0.488  , 0.326  , ..., 1.     , 0.0328 , 0.323  ],
       [0.911  , 0.64   , 0.381  , ..., 1.     , 0.0302 , 0.581  ]])

In [62]:
w_in = np.zeros((7,))
iterations = 1000
theta = 0.5
w_seven = gradient_descent(X_seven, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_seven}")

w found by gradient descent: [ 0.16502964  0.50495143  0.31296047  0.0230785   0.04015932 -0.00616254
 -0.13694376]


In [63]:
print(f"Loss by using seven features and 0.5 learning rate: {loss_function(X_seven, Y, w_seven)}")

Loss by using seven features and 0.5 learning rate: 0.02567210474184638


In [64]:
w_in = np.zeros((7,))
iterations = 1000
theta = 0.1
w_seven = gradient_descent(X_seven, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_seven}")

w found by gradient descent: [ 0.16554554  0.4844385   0.30915323  0.03344187  0.04140741  0.03594172
 -0.1217388 ]


In [65]:
print(f"Loss by using seven features and 0.1 learning rate: {loss_function(X_seven, Y, w_seven)}")

Loss by using seven features and 0.1 learning rate: 0.0256889381693104


In [66]:
w_in = np.zeros((7,))
iterations = 1000
theta = 0.01
w_seven = gradient_descent(X_seven, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_seven}")

w found by gradient descent: [0.12562612 0.30304988 0.25345424 0.06516933 0.07930233 0.05020179
 0.10927142]


In [67]:
print(f"Loss by using seven features and 0.01 learning rate: {loss_function(X_seven, Y, w_seven)}")

Loss by using seven features and 0.01 learning rate: 0.02747714621141122


In [68]:
w_in = np.zeros((7,))
iterations = 1000
theta = 0.001
w_seven = gradient_descent(X_seven, Y, w_in, theta, iterations)
print(f"w found by gradient descent: {w_seven}")

w found by gradient descent: [0.06750876 0.17202233 0.17000521 0.04647642 0.14780201 0.02797491
 0.13208282]


In [69]:
print(f"Loss by using seven features and 0.001 learning rate: {loss_function(X_seven, Y, w_seven)}")

Loss by using seven features and 0.001 learning rate: 0.03547979908221579
