### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

### Preprocessing

In [2]:
# Defining labels for ranking scores:
def convert_y(dataY):
    y = []
    for i in range (len(dataY)):
        if(dataY[i] > 50):
            y.append(0)
        else:
            y.append(1)
    return np.array(y)

In [3]:
# Reading data:
dataX = pd.read_csv('../datasets/spotofy_music_normalized.csv')
dataY = pd.read_csv('../datasets/spotofy_music_labels.csv')
dataY_multi = pd.read_csv('../datasets/spotofy_music_labels.csv')
dataY.head()

Unnamed: 0,Rank
0,1
1,2
2,3
3,4
4,2


In [4]:
cut_labels = [1, 2, 3, 4, 5]
cut_bins = [0, 40, 80, 120, 160, 200]
dataY_multi['Rank'] = pd.cut(dataY_multi['Rank'], bins=cut_bins, labels=cut_labels)

In [5]:
m = len(dataX) # Number of  examples
X = np.array(dataX)
Y = np.array(dataY)
Y1= np.array(dataY_multi)
Y = convert_y(Y)
Y = np.array(Y)
Y_multi = []
for i in range (len(Y1)):
    Y_multi.append(Y1[i][0])
Y_multi = np.array(Y_multi)

In [6]:
def accuracy(Y,y_pred):
    return np.sum(y_pred==Y)/len(Y)


### Binary Classification

In [7]:
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    model = SGDClassifier(alpha=0.001, penalty='l2')
    model.fit(X_train, y_train)
    y_predicted = model.predict(X_test)
    confusionM = confusion_matrix(y_test,y_predicted )
    print("Confusion Matrix is :")
    print(confusionM)
    print()
    print("Accuracy is:",accuracy(y_test, y_predicted))
    TP = confusionM[1][1]
    TN = confusionM[0][0]
    FN = confusionM[1][0]
    FP = confusionM[0][1]
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    f1_score = 2*((precision* recall)/ (precision+recall))
    print("Precison is:", precision)
    print("Recall is:", recall)
    print("F1-score of SGD is:", f1_score)
    print()



Confusion Matrix is :
[[558  22]
 [235  35]]

Accuracy is: 0.6976470588235294
Precison is: 0.6140350877192983
Recall is: 0.12962962962962962
F1-score of SGD is: 0.21406727828746175

Confusion Matrix is :
[[505  35]
 [255  55]]

Accuracy is: 0.6588235294117647
Precison is: 0.6111111111111112
Recall is: 0.1774193548387097
F1-score of SGD is: 0.27499999999999997

Confusion Matrix is :
[[586   2]
 [240  21]]

Accuracy is: 0.7149587750294464
Precison is: 0.9130434782608695
Recall is: 0.08045977011494253
F1-score of SGD is: 0.14788732394366197

Confusion Matrix is :
[[512  43]
 [248  46]]

Accuracy is: 0.657243816254417
Precison is: 0.5168539325842697
Recall is: 0.1564625850340136
F1-score of SGD is: 0.24020887728459528

Confusion Matrix is :
[[542  14]
 [263  30]]

Accuracy is: 0.673733804475854
Precison is: 0.6818181818181818
Recall is: 0.10238907849829351
F1-score of SGD is: 0.17804154302670624



### Multiclass

In [8]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC
kf = KFold(n_splits=5)
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = Y_multi[train_index], Y_multi[test_index]
    model = OneVsRestClassifier(SGDClassifier(alpha=0.001, penalty='l2'))
    model.fit(X_train, y_train)
    y_predicted = model.predict(X_test)
    confusionM = confusion_matrix(y_test,y_predicted )
    print("Confusion Matrix is :")
    print(confusionM)
    print()
    print("Accuracy is:",accuracy(y_test, y_predicted))
    TP = confusionM[1][1]
    TN = confusionM[0][0]
    FN = confusionM[1][0]
    FP = confusionM[0][1]
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    precision = TP/(TP+FP)
    recall = TP/(TP+FN)
    f1_score = 2*((precision* recall)/ (precision+recall))
    print("Precison is:", precision)
    print("Recall is:", recall)
    print("F1-score of SGD is:", f1_score)
    print()


Confusion Matrix is :
[[101  24  75   5  31]
 [ 64  19  47   9  19]
 [ 47  18  52   9  37]
 [ 53  18  53   5  24]
 [ 35  22  52   5  26]]

Accuracy is: 0.2388235294117647
Precison is: 0.4418604651162791
Recall is: 0.2289156626506024
F1-score of SGD is: 0.30158730158730157

Confusion Matrix is :
[[177  13  35  31   8]
 [ 99   6  27  38  19]
 [ 68  11  28  32  10]
 [ 65   4  32  29  11]
 [ 37   5  23  28  14]]

Accuracy is: 0.2988235294117647
Precison is: 0.3157894736842105
Recall is: 0.05714285714285714
F1-score of SGD is: 0.09677419354838708

Confusion Matrix is :
[[139  32   2  35  20]
 [ 69  37   4  37  24]
 [ 41  27   2  41  18]
 [ 69  35   4  39  23]
 [ 46  38   2  35  30]]

Accuracy is: 0.29093050647820967
Precison is: 0.5362318840579711
Recall is: 0.3490566037735849
F1-score of SGD is: 0.42285714285714293

Confusion Matrix is :
[[143  44  24   9  21]
 [ 96  54  36   8  20]
 [ 56  43  32   9  21]
 [ 24  39  30   8  17]
 [ 34  24  28   5  24]]

Accuracy is: 0.30742049469964666
Prec