In [2]:
%pip install surprise
%pip install pandas

# Import necessary libraries
from surprise import Reader, Dataset, SVD
from surprise.model_selection import train_test_split
import pandas as pd

# Load data from a pandas dataframe
df = pd.read_csv('sales_data.csv')

# Define the reader object
reader = Reader(rating_scale=(0, 100))

# Load data into a surprise dataset
data = Dataset.load_from_df(df[['Product ID', 'Quantity Ordered']], reader)

# Split data into train and test sets
trainset, testset = train_test_split(data, test_size=.25)

# Define the model and fit to the training set
model = SVD()
model.fit(trainset)

# Generate predictions for the test set
predictions = model.test(testset)

# Print the RMSE for the predictions
from surprise import accuracy
print('RMSE:', accuracy.rmse(predictions))

# Generate a list of the top 10 products based on predicted popularity
product_ids = df['Product ID'].unique()
product_predictions = [(product_id, model.predict(1, product_id)[3]) for product_id in product_ids]
top_products = sorted(product_predictions, key=lambda x: x[1], reverse=True)[:10]

# Plot the popularity of the top 10 products
import matplotlib.pyplot as plt
plt.bar(range(len(top_products)), [x[1] for x in top_products])
plt.xticks(range(len(top_products)), [x[0] for x in top_products], rotation=90)
plt.xlabel('Product ID')
plt.ylabel('Popularity')
plt.show()

# Plot the distribution of predicted popularity
import numpy as np
popularity = np.array([x[1] for x in product_predictions])
plt.hist(popularity, bins=20)
plt.xlabel('Predicted popularity')
plt.ylabel('Frequency')
plt.show()

# Generate a table of the top 10 products based on predicted popularity
top_products_table = pd.DataFrame(top_products, columns=['Product ID', 'Predicted Popularity'])
print(top_products_table)

# Compute precision, recall, and F1-score
from collections import defaultdict
from surprise import Dataset
from surprise.model_selection import LeaveOneOut

def get_top_n(predictions, n=10):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

data = Dataset.load_from_df(df[['Product ID', 'Quantity Ordered']], reader)
trainset = data.build_full_trainset()
testset = trainset.build_anti_testset()

algo = SVD()
algo.fit(trainset)

top_n = get_top_n(predictions, n=10)
precision = accuracy.precision(top_n, k=10)
recall = accuracy.recall(top_n, k=10)
f1_score = 2 * (precision * recall) / (precision + recall)

print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1_score)

<class 'ValueError'>: Can't find a pure Python 3 wheel for 'scikit-surprise'.
See: https://pyodide.org/en/stable/usage/faq.html#micropip-can-t-find-a-pure-python-wheel
You can use `micropip.install(..., keep_going=True)`to get a list of all packages with missing wheels.