In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('/Users/zac/Codes/Music_Project/GIT_HUB/Musis_Recommendation_Engine/data/testing/testing.csv')

# Ensure there are no missing values
data.dropna(inplace=True)

# Preprocess the features (scaling numeric features)
scaler = StandardScaler()
numeric_features = ['popularity', 'danceability', 'energy', 'key', 'loudness', 
                    'mode', 'speechiness', 'acousticness', 'instrumentalness', 
                    'liveness', 'valence', 'tempo', 'duration_ms']

data[numeric_features] = scaler.fit_transform(data[numeric_features])

# Encode the mood as a numerical value
data['mood'] = data['mood'].astype('category').cat.codes


In [3]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

def create_base_network(input_shape):
    input = Input(shape=input_shape)
    x = Dense(128, activation='relu')(input)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    return Model(input, x)

# Define the input shape
input_shape = (len(numeric_features) + 1, )

# Create the base network
base_network = create_base_network(input_shape)

# Create the inputs
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)

# Create the outputs
processed_a = base_network(input_a)
processed_b = base_network(input_b)

# Define the distance function
def euclidean_distance(vectors):
    x, y = vectors
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

# Calculate the distance
distance = Lambda(euclidean_distance, output_shape=(1,))([processed_a, processed_b])

# Create the model
model = Model([input_a, input_b], distance)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [4]:
import numpy as np

# Function to create pairs of samples and labels
def create_pairs(data, labels):
    pairs = []
    labels = []
    
    unique_labels = np.unique(labels)
    label_to_indices = {label: np.where(labels == label)[0] for label in unique_labels}
    
    for idx in range(len(data)):
        current_sample = data[idx]
        current_label = labels[idx]
        
        positive_idx = np.random.choice(label_to_indices[current_label])
        negative_label = np.random.choice(unique_labels[unique_labels != current_label])
        negative_idx = np.random.choice(label_to_indices[negative_label])
        
        pairs += [[current_sample, data[positive_idx]], [current_sample, data[negative_idx]]]
        labels += [1, 0]
    
    return np.array(pairs), np.array(labels)

# Create pairs and labels
pairs, labels = create_pairs(data[numeric_features + ['mood']].values, data['mood'].values)

# Split into training and validation sets
split = int(len(pairs) * 0.8)
pairs_train, pairs_val = pairs[:split], pairs[split:]
labels_train, labels_val = labels[:split], labels[split:]

# Train the model
model.fit([pairs_train[:, 0], pairs_train[:, 1]], labels_train, 
          validation_data=([pairs_val[:, 0], pairs_val[:, 1]], labels_val), 
          epochs=10, batch_size=128)


IndexError: list index out of range

In [None]:
import numpy as np

# Function to create pairs of samples and labels
def create_pairs(data, labels):
    pairs = []
    labels = []
    
    unique_labels = np.unique(labels)
    label_to_indices = {label: np.where(labels == label)[0] for label in unique_labels}
    
    for idx in range(len(data)):
        current_sample = data[idx]
        current_label = labels[idx]
        
        positive_idx = np.random.choice(label_to_indices[current_label])
        negative_label = np.random.choice(unique_labels[unique_labels != current_label])
        negative_idx = np.random.choice(label_to_indices[negative_label])
        
        pairs += [[current_sample, data[positive_idx]], [current_sample, data[negative_idx]]]
        labels += [1, 0]
    
    return np.array(pairs), np.array(labels)

# Create pairs and labels
pairs, labels = create_pairs(data[numeric_features + ['mood']].values, data['mood'].values)

# Split into training and validation sets
split = int(len(pairs) * 0.8)
pairs_train, pairs_val = pairs[:split], pairs[split:]
labels_train, labels_val = labels[:split], labels[split:]

# Train the model
model.fit([pairs_train[:, 0], pairs_train[:, 1]], labels_train, 
          validation_data=([pairs_val[:, 0], pairs_val[:, 1]], labels_val), 
          epochs=10, batch_size=128)


In [None]:
# Function to compute the embedding of a sample
def compute_embedding(sample):
    return base_network.predict(sample.reshape(1, -1))

# Function to recommend songs based on mood
def recommend_songs(user_mood, num_recommendations=5):
    # Convert mood to the same encoding as the data
    user_mood_code = pd.Series([user_mood]).astype('category').cat.codes.values[0]
    
    # Create a sample input with the specified mood
    sample_input = np.zeros((1, len(numeric_features) + 1))
    sample_input[0, -1] = user_mood_code  # Set mood
    sample_input[0, :-1] = np.mean(data[numeric_features], axis=0)  # Set average values for other features
    
    # Compute the embedding of the sample input
    sample_embedding = compute_embedding(sample_input[0])
    
    # Compute the distances to all other samples
    distances = np.linalg.norm(base_network.predict(data[numeric_features + ['mood']].values) - sample_embedding, axis=1)
    
    # Get the indices of the closest samples
    recommendation_indices = np.argsort(distances)[:num_recommendations]
    
    recommendations = data.iloc[recommendation_indices]
    return recommendations[['song_name', 'artist_name']]

# Example usage
print(recommend_songs('happy', num_recommendations=5))
