In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from surprise import SVD, Dataset, Reader
from sklearn.metrics.pairwise import cosine_similarity

# Load and preprocess data
def load_data(user_data_file, product_data_file):
    user_data = pd.read_csv(user_data_file, delimiter='\t')
    product_data = pd.read_csv(product_data_file, delimiter='\t')
    
    # Preprocess user data (e.g., handle missing values, encode categorical features)
    user_data['rating'] = user_data['weight']*5/max(user_data['weight'])
    
    # Preprocess product data (e.g., handle missing values, preprocess text data)
    #product_data['rating'] = product_data['weight']*5/max(pruduct_data['weight'])
    
    return user_data, product_data

# Collaborative filtering module
def collaborative_filtering(user_data):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(user_data[['userID', 'artistID', 'rating']], reader)
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)
    
    return algo


# Content-based filtering module with deep learning
def content_based_filtering(product_data):
    # Text preprocessing
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(product_data['name'])
    sequences = tokenizer.texts_to_sequences(product_data['name'])
    max_length = max([len(seq) for seq in sequences])
    padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')
    
    # Deep learning model for product representation
    embedding_dim = 128
    vocab_size = len(tokenizer.word_index) + 1
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(product_data.shape[0], activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    model.fit(padded_sequences, np.eye(product_data.shape[0]), epochs=10, batch_size=32)
    
    # Compute product similarity matrix
    product_representations = model.predict(padded_sequences)
    product_similarity_matrix = cosine_similarity(product_representations)
    
    return product_similarity_matrix

# Hybrid recommendation system
def hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model, alpha=0.5):
    user_interactions = user_data[user_data['userID'] == user_id]
    
    # Collaborative filtering predictions
    collab_predictions = [collab_model.predict(user_id, product_id)[3] for product_id in product_data['id']]
    
    # Content-based filtering predictions
    user_liked_products = user_interactions[user_interactions['rating'] > 3]['artistID']
    user_liked_products_indices = [product_data[product_data['id'] == product_id].index[0] for product_id in user_liked_products]
    content_predictions = content_model[user_liked_products_indices].sum(axis=0) / len(user_liked_products_indices)
    
    # Hybrid recommendations
    hybrid_predictions = alpha * np.array(collab_predictions) + (1 - alpha) * np.array(content_predictions)
    hybrid_recommendations = product_data.iloc[hybrid_predictions.argsort()[::-1]]
    
    return hybrid_recommendations

# Example usage
user_data, product_data = load_data('user_artists.dat', 'artists.dat')
collab_model = collaborative_filtering(user_data)
content_model = content_based_filtering(product_data)

user_id = 123  # Example user ID
hybrid_recommendations = hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model)
print(hybrid_recommendations)

In [4]:
user_data.head()

Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983


In [5]:
product_data.head()

Unnamed: 0,id,name,url,pictureURL
0,1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
1,2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
2,3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...
3,4,Moi dix Mois,http://www.last.fm/music/Moi+dix+Mois,http://userserve-ak.last.fm/serve/252/54697835...
4,5,Bella Morte,http://www.last.fm/music/Bella+Morte,http://userserve-ak.last.fm/serve/252/14789013...


In [12]:
friends = pd.read_csv('/Users/user/Downloads/hetrec2011-lastfm-2k/user_friends.dat', delimiter='\t')
friends.head()

Unnamed: 0,userID,friendID
0,2,275
1,2,428
2,2,515
3,2,761
4,2,831


In [13]:
times = pd.read_csv('/Users/user/Downloads/hetrec2011-lastfm-2k/user_taggedartists-timestamps.dat', delimiter='\t')
times.head()

Unnamed: 0,userID,artistID,tagID,timestamp
0,2,52,13,1238536800000
1,2,52,15,1238536800000
2,2,52,18,1238536800000
3,2,52,21,1238536800000
4,2,52,41,1238536800000


In [14]:
tagged = pd.read_csv('/Users/user/Downloads/hetrec2011-lastfm-2k/user_taggedartists.dat', delimiter='\t')
tagged.head()

Unnamed: 0,userID,artistID,tagID,day,month,year
0,2,52,13,1,4,2009
1,2,52,15,1,4,2009
2,2,52,18,1,4,2009
3,2,52,21,1,4,2009
4,2,52,41,1,4,2009


In [None]:
tag_df = pd.read_csv('/Users/user/Downloads/hetrec2011-lastfm-2k/tags.dat')
tag_df.head()

In [18]:
pd.read_csv?