In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler

In [None]:
#Read datasets and add 'isLiked' column with default values to both dataframes
liked = pd.read_csv("/content/drive/MyDrive/MusicMachineLearningProject/database_likes.csv")
disliked = pd.read_csv("/content/drive/MyDrive/MusicMachineLearningProject/database_dislikes.csv")
liked["isLiked"] = True
disliked["isLiked"] = False

In [None]:
#Drop any rows with missing values (ie. local files)
liked.dropna(inplace=True)
disliked.dropna(inplace=True)

In [None]:
#Merge the dataframes, drop duplicate songs
merged = pd.concat([liked,disliked])
merged.drop_duplicates(subset=["Spotify ID"], inplace=True)
merged.reset_index(drop=True, inplace=True)

In [None]:
#Split Data
X = merged.drop(['Spotify ID', 'Artist IDs', 'Track Name', 'Album Name',
       'Artist Name(s)', 'Release Date', 'Duration (ms)', 'Added By', 'Added At', 'Genres', 'Loudness', 'isLiked'], axis=1)
y = merged['isLiked']

#Create scaler for normalization
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

#Train model using logistic regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)
model = LogisticRegression(random_state=101, max_iter=1000)
model.fit(X_train, y_train)

#Create a confusion matrix
predictions = model.predict(X_test)
cnf_matrix = metrics.confusion_matrix(y_test, predictions)
cnf_matrix

array([[42, 27],
       [12, 72]])

In [None]:
#Display report
target_names = ['Disliked', 'Liked']
print(metrics.classification_report(y_test, predictions, target_names=target_names))

              precision    recall  f1-score   support

    Disliked       0.78      0.61      0.68        69
       Liked       0.73      0.86      0.79        84

    accuracy                           0.75       153
   macro avg       0.75      0.73      0.73       153
weighted avg       0.75      0.75      0.74       153

