In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Flatten, Input, Concatenate
from keras.optimizers import Adam

In [2]:
#Readnig Dataset
data = pd.read_csv("/content/Last.fm_data.csv")
data.head()

Unnamed: 0,ID,Username,Artist,Track,Album,Date,Time,play_count
0,0,Babs_05,Isobel Campbell,The Circus Is Leaving Town,Ballad of the Broken Seas,31-Jan-21,23:36,1
1,1,Babs_05,Isobel Campbell,Dusty Wreath,Ballad of the Broken Seas,31-Jan-21,23:32,1
2,2,Babs_05,Isobel Campbell,Honey Child What Can I Do?,Ballad of the Broken Seas,31-Jan-21,23:28,1
3,3,Babs_05,Isobel Campbell,It's Hard To Kill A Bad Thing,Ballad of the Broken Seas,31-Jan-21,23:25,1
4,4,Babs_05,Isobel Campbell,Saturday's Gone,Ballad of the Broken Seas,31-Jan-21,23:21,1


In [3]:
# Encode categorical data (Username, Track)
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

data['user_id'] = user_encoder.fit_transform(data['Username'])
data['item_id'] = item_encoder.fit_transform(data['Track'])

In [4]:
# Prepare features (user_id, item_id) and target (play_count)
X = data[['user_id', 'item_id']].values
y = data['play_count'].values

In [5]:
# Set a threshold to classify play_count into "liked" or "not liked"
# Assuming if play_count >= 5, the user likes the track (this threshold can be adjusted)
threshold = 5
y_binary = np.where(y >= threshold, 1, 0)

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

In [7]:
# Build the ANN model
num_users = len(data['user_id'].unique())
num_items = len(data['item_id'].unique())

In [8]:
# Define two embedding layers for users and items
user_embedding = Sequential()
user_embedding.add(Embedding(input_dim=num_users, output_dim=50, input_length=1))
user_embedding.add(Flatten())

item_embedding = Sequential()
item_embedding.add(Embedding(input_dim=num_items, output_dim=50, input_length=1))
item_embedding.add(Flatten())



In [9]:
# Combine both embeddings and add dense layers
# Input layers
user_input = Input(shape=(1,))
item_input = Input(shape=(1,))

In [10]:
# Embedding layers
user_emb = user_embedding(user_input)
item_emb = item_embedding(item_input)

In [11]:
# Concatenate user and item embeddings
concatenated = Concatenate()([user_emb, item_emb])

# Add hidden dense layers
x = Dense(128, activation='relu')(concatenated)
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)  # Binary classification output

# Define the model
model = Model([user_input, item_input], output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])


In [12]:
# Train the model (with both user_id and item_id as inputs)
history = model.fit([X_train[:, 0], X_train[:, 1]], y_train, epochs=20, batch_size=32, validation_split=0.2)

Epoch 1/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 33ms/step - accuracy: 0.9398 - loss: 0.2047 - val_accuracy: 0.9711 - val_loss: 0.0817
Epoch 2/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 34ms/step - accuracy: 0.9771 - loss: 0.0546 - val_accuracy: 0.9781 - val_loss: 0.0855
Epoch 3/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 33ms/step - accuracy: 0.9874 - loss: 0.0321 - val_accuracy: 0.9730 - val_loss: 0.0759
Epoch 4/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 33ms/step - accuracy: 0.9956 - loss: 0.0117 - val_accuracy: 0.9478 - val_loss: 0.1688
Epoch 5/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 33ms/step - accuracy: 0.9983 - loss: 0.0044 - val_accuracy: 0.9488 - val_loss: 0.2219
Epoch 6/20
[1m3324/3324[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 34ms/step - accuracy: 0.9993 - loss: 0.0022 - val_accuracy: 0.9640 - val_loss: 0.094

In [13]:
# Evaluate the model accuracy
loss, accuracy = model.evaluate([X_test[:, 0], X_test[:, 1]], y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

[1m1039/1039[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9726 - loss: 0.1022
Accuracy: 97.19%


In [14]:
# Recommend tracks for a specific user (by username)
specific_username = 'Babs_05'

# Get the encoded user_id for the specified username
user_id = user_encoder.transform([specific_username])[0]

# Get all unique item IDs (tracks) to predict the user's preference for each track
all_item_ids = data['item_id'].unique()

# Predict the likelihood of liking each track
predictions = model.predict([np.full_like(all_item_ids, user_id), all_item_ids])

# Get top N tracks that the user is most likely to like
N = 10
top_N_indices = np.argsort(predictions[:, 0])[-N:][::-1]  # Indices of top N tracks

# Convert item IDs back to track names
recommended_tracks = item_encoder.inverse_transform(all_item_ids[top_N_indices])

# Print the actual username and the recommended tracks
print(f"Recommended tracks for User '{specific_username}':")
for track in recommended_tracks:
    print(track)


[1m2102/2102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step
Recommended tracks for User 'Babs_05':
Holiday
Friends
How’s Your Mind
Just Because
From Home
Homecoming
Anemone
Chinese Satellite
Moon Song
Navigator
