In [1]:
!pip install surprise

Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162996 sha256=fc5c447813304fb79996bf49040ae0633bf2bd838f04cc71ecce211a1029fe39
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [None]:
# Basic imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns

# Machine learning and data processing
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from surprise import SVD, Dataset, Reader

# Logging for debugging
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [None]:
# Load and preprocess data
def load_data(user_data_file, product_data_file):
    try:
        user_data = pd.read_csv(user_data_file, delimiter='\t')
        product_data = pd.read_csv(product_data_file, delimiter='\t')

        # Handle missing values and scale weights
        user_data['rating'] = user_data['weight'] * 5 / user_data['weight'].max()
        product_data.fillna(method='ffill', inplace=True)  # Forward filling missing values
        logging.info("Data loaded and preprocessed successfully.")
        return user_data, product_data
    except Exception as e:
        logging.error(f"Error loading data: {e}")
        return None, None

In [None]:
# Collaborative filtering module
def collaborative_filtering(user_data):
    try:
        reader = Reader(rating_scale=(1, 5))
        data = Dataset.load_from_df(user_data[['userID', 'artistID', 'rating']], reader)
        trainset = data.build_full_trainset()
        algo = SVD()
        algo.fit(trainset)
        logging.info("Collaborative filtering model trained successfully.")
        return algo
    except Exception as e:
        logging.error(f"Error in collaborative filtering: {e}")
        return None

In [None]:
# Content-based filtering module with deep learning
def content_based_filtering(product_data):
    try:
        tokenizer = Tokenizer()
        tokenizer.fit_on_texts(product_data['name'])
        sequences = tokenizer.texts_to_sequences(product_data['name'])
        max_length = max([len(seq) for seq in sequences])
        padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

        # Deep learning model for product representation
        embedding_dim = 128
        vocab_size = len(tokenizer.word_index) + 1
        model = Sequential([
            Embedding(vocab_size, embedding_dim, input_length=max_length),
            LSTM(128, dropout=0.2, recurrent_dropout=0.2),
            Dense(64, activation='relu'),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dense(product_data.shape[0], activation='sigmoid')
        ])
        model.compile(loss='binary_crossentropy', optimizer='adam')
        model.fit(padded_sequences, np.eye(product_data.shape[0]), epochs=5, batch_size=32, validation_split=0.1)
        product_representations = model.predict(padded_sequences)
        product_similarity_matrix = cosine_similarity(product_representations)
        logging.info("Content-based filtering model trained and similarity matrix created.")
        return product_similarity_matrix
    except Exception as e:
        logging.error(f"Error in content-based filtering: {e}")
        return None

In [None]:
# Visualization Functions
def plot_histogram(recommendations, column='rating'):
    if recommendations is not None and column in recommendations.columns:
        plt.figure(figsize=(10, 5))
        plt.hist(recommendations[column], bins=20, color='blue', alpha=0.7)
        plt.title('Histogram of Ratings in Recommended Products')
        plt.xlabel('Rating')
        plt.ylabel('Number of Products')
        plt.grid(True)
        plt.show()
    else:
        print("No valid recommendations to display or specified column missing.")

In [None]:
def plot_similarity_matrix(matrix, labels=None):
    plt.figure(figsize=(10, 8))
    sns.heatmap(matrix, xticklabels=labels, yticklabels=labels, cmap='viridis')
    plt.title('Product Similarity Matrix')
    plt.xlabel('Products')
    plt.ylabel('Products')
    plt.show()

In [None]:
# Hybrid recommendation system
def hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model, alpha=0.5):
    try:
        user_interactions = user_data[user_data['userID'] == user_id]
        collab_predictions = [collab_model.predict(user_id, product_id).est for product_id in product_data['id']]

        user_liked_products = user_interactions[user_interactions['rating'] > 3]['artistID']
        user_liked_products_indices = [product_data[product_data['id'] == product_id].index[0] for product_id in user_liked_products]

        if user_liked_products_indices:
            content_predictions = content_model[user_liked_products_indices].sum(axis=0) / len(user_liked_products_indices)
        else:
            content_predictions = np.zeros_like(collab_predictions)  # Default to zero or some other logic

        hybrid_predictions = alpha * np.array(collab_predictions) + (1 - alpha) * np.array(content_predictions)
        recommendations = product_data.iloc[np.argsort(hybrid_predictions)[::-1]]

        return recommendations
    except Exception as e:
        logging.error(f"Error in hybrid recommendation: {e}")
        return None

In [None]:
# Main function to run the recommendation system
def main():
    user_data, product_data = load_data('user_artists.dat', 'artists.dat')
    if user_data is not None and product_data is not None:
        collab_model = collaborative_filtering(user_data)
        content_model = content_based_filtering(product_data)
        if collab_model is not None and content_model is not None:
            user_id = 123
            recommendations = hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model)
            if recommendations is not None and 'rating' in recommendations.columns and not recommendations.empty:
                print(recommendations)
                plot_histogram(recommendations, 'rating')
                plot_similarity_matrix(content_model)
            else:
                logging.info("No valid recommendations to display or 'rating' column missing.")
        else:
            logging.error("Failed to train models or models returned None.")
    else:
        logging.error("Data loading failed, cannot proceed with model training.")

In [None]:
if __name__ == "__main__":
    main()



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
from surprise.model_selection import train_test_split

trainset, testset = train_test_split(data, test_size=0.25, random_state=42)
logging.info("Data split into training and testing sets successfully.")

In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, Dropout
from surprise import SVD, Dataset, Reader
from sklearn.metrics.pairwise import cosine_similarity

# Load and preprocess data
def load_data(user_data_file, product_data_file):
    user_data = pd.read_csv(user_data_file, delimiter='\t')
    product_data = pd.read_csv(product_data_file, delimiter='\t')

    # Preprocess user data (e.g., handle missing values, encode categorical features)
    user_data['rating'] = user_data['weight']*5/max(user_data['weight'])

    # Preprocess product data (e.g., handle missing values, preprocess text data)
    #product_data['rating'] = product_data['weight']*5/max(pruduct_data['weight'])

    return user_data, product_data

# Collaborative filtering module
def collaborative_filtering(user_data):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(user_data[['userID', 'artistID', 'rating']], reader)
    trainset = data.build_full_trainset()
    algo = SVD()
    algo.fit(trainset)

    return algo


# Content-based filtering module with deep learning
def content_based_filtering(product_data):
    # Text preprocessing
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(product_data['name'])
    sequences = tokenizer.texts_to_sequences(product_data['name'])
    max_length = max([len(seq) for seq in sequences])
    padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

    # Deep learning model for product representation
    embedding_dim = 128
    vocab_size = len(tokenizer.word_index) + 1
    model = Sequential()
    model.add(Embedding(vocab_size, embedding_dim, input_length=max_length))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(product_data.shape[0], activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    model.fit(padded_sequences, np.eye(product_data.shape[0]), epochs=10, batch_size=32)

    # Compute product similarity matrix
    product_representations = model.predict(padded_sequences)
    product_similarity_matrix = cosine_similarity(product_representations)

    return product_similarity_matrix

# Hybrid recommendation system
def hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model, alpha=0.5):
    user_interactions = user_data[user_data['userID'] == user_id]

    # Collaborative filtering predictions
    collab_predictions = [collab_model.predict(user_id, product_id)[3] for product_id in product_data['id']]

    # Content-based filtering predictions
    user_liked_products = user_interactions[user_interactions['rating'] > 3]['artistID']
    user_liked_products_indices = [product_data[product_data['id'] == product_id].index[0] for product_id in user_liked_products]
    content_predictions = content_model[user_liked_products_indices].sum(axis=0) / len(user_liked_products_indices)

    # Hybrid recommendations
    hybrid_predictions = alpha * np.array(collab_predictions) + (1 - alpha) * np.array(content_predictions)
    hybrid_recommendations = product_data.iloc[hybrid_predictions.argsort()[::-1]]

    return hybrid_recommendations

# Example usage
user_data, product_data = load_data('user_artists.dat', 'artists.dat')
collab_model = collaborative_filtering(user_data)
content_model = content_based_filtering(product_data)

user_id = 123  # Example user ID
hybrid_recommendations = hybrid_recommendation(user_id, user_data, product_data, collab_model, content_model)
print(hybrid_recommendations)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
          id                               name  \
5873    5998    Madonna feat. Justin Timberlake   
5874    5999                             Zombie   
5875    6000                 LanzamientosMp3.es   
5876    6001                             Shamur   
5877    6002                      Juliana Pasha   
...      ...                                ...   
11757  12183   Thao with The Get Down Stay Down   
11758  12184                           トクマルシューゴ   
11759  12185                           FREENOTE   
11760  12186  Ella Fitzgerald & Louis Armstrong   
11752  12178                    BEAT!BEAT!BEAT!   

                                                     url  \
5873   http://www.last.fm/music/Madonna+feat.+Justin+...   
5874                     http://www.last.fm/music/Zombie   
5875         http://www.last.fm/music/LanzamientosMp3.es   
5876                     http://www.

  content_predictions = content_model[user_liked_products_indices].sum(axis=0) / len(user_liked_products_indices)
