# Popularity - Popular products

A simple rank of most seen products and a simple rank of most sold products. Both results combined is the popularity rank.

### Initialize Django

In [None]:
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "shuup_workbench.settings")
import django
django.setup()

### Remove old data

In [None]:
from django.contrib.auth import get_user_model
from shuup.core.models import *
from shuup_recommender.models import ProductView

ShipmentProduct.objects.filter(shipment__order__identifier__startswith="fake-").delete()
Shipment.objects.filter(order__identifier__startswith="fake-").delete()
Payment.objects.filter(order__identifier__startswith="fake-").delete()
OrderLine.objects.filter(order__identifier__startswith="fake-").delete()
Order.objects.filter(identifier__startswith="fake-").delete()
PersonContact.objects.filter(identifier__startswith="fake-").delete()
Product.objects.filter(sku__startswith="fake-").delete()
get_user_model().objects.exclude(is_superuser=True).delete()
ProductView.objects.all().delete()

### Create fake data

In [None]:
import random
import uuid
from django.db.transaction import atomic
from shuup.core.models import *
from shuup.testing import factories


shop = Shop.objects.first()
supplier = factories.get_default_supplier()


with atomic():
    customers = []
    for index in range(20):
        customer = factories.create_random_person()
        customer.identifier = "fake-{}".format(uuid.uuid4().hex)
        customer.save()
        customers.append(customer)

    products = [
        factories.create_product(
            sku="fake-{}".format(uuid.uuid4().hex),
            shop=shop,
            supplier=supplier,
            default_price=random.randrange(1, 100)
        )
        for index in range(100)
    ]

    orders = []
    for index in range(200):
        order = factories.create_random_order(
            customer=random.choice(customers),
            products=products,
            completion_probability=1,
            shop=shop
        )
        order.identifier = "fake-{}".format(uuid.uuid4().hex)
        order.save()
        orders.append(order)

### Generate random product views

In [None]:
import random
from django.db.transaction import atomic
from django.contrib.auth import get_user_model
from shuup.core.models import *
from shuup_recommender.models import ProductView
from shuup.testing import factories

customers = list(PersonContact.objects.all())
products = list(Product.objects.all())
User = get_user_model()

with atomic():
    for i in range(1000):
        customer = random.choice(customers)
        product = random.choice(products)
        customer.refresh_from_db()

        user = getattr(customer, "user", None)
        if not user:
            user = factories.create_random_user()
            customer.user = user
            customer.save()

        ProductView.objects.create(product=product, user=user)

## 1. Get most sold products

In [None]:
import pandas as pd
from django_pandas.io import read_frame
from shuup.core.models import OrderLine, OrderStatus, Shop

shop = Shop.objects.first()

order_lines = OrderLine.objects.products().filter(
    order__status=OrderStatus.objects.get_default_complete(),
    order__shop=shop
)
items_df = read_frame(order_lines, fieldnames=["product_id", "quantity"], verbose=False)

# group by product ID and sum quantities
sold_items = items_df.groupby(["product_id"]).sum()

# get the max value of quantity
max_value = sold_items["quantity"].max()

# normalize the values - this way we can easily plot them later
sold_items["sold_rank"] = (sold_items["quantity"] / max_value).apply(pd.to_numeric)

# remove quantity col
del(sold_items["quantity"])

# sort values by rank
sold_items_rank = sold_items

## 2. Get most viewed products

In [None]:
import pandas as pd
from django_pandas.io import read_frame

from shuup.core.models import Shop
from shuup_recommender.models import ProductView

shop = Shop.objects.first()

product_views_df = read_frame(ProductView.objects.all(), fieldnames=["product_id"], verbose=False)
product_views_df["views"] = 1

# group by product ID and sum views
viewed_products = product_views_df.groupby(["product_id"]).sum()

# get the max value of views
max_value = viewed_products["views"].max()

# normalize the values - this way we can easily plot them later
viewed_products["view_rank"] = (viewed_products["views"] / max_value).apply(pd.to_numeric)

# remove views col
del(viewed_products["views"])

viewed_products_rank = viewed_products

## 3. Merge results together

In [None]:
products_rank = pd.merge(
    sold_items_rank,
    viewed_products_rank,
    how="outer",
    left_index=True,
    right_index=True
)

## 4. Visualize data as a graph

We should see a scatter graph. We can't just create an average of solds and views rank, eg: `rank = ((solds + views) / 2)`. We should calculate the pythagorean distance, as the data is normalized. This way, we'll have products that are most sold AND viewed.

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

fig, ax = plt.subplots(1, 1)
ax.scatter(products_rank["view_rank"], products_rank["sold_rank"])

plt.title("Sold vs Views rank")
ax.set_xlabel("Views rank")
ax.set_ylabel("Solds rank")
plt.show()

## Calculate the Pythagorean distance and we have the popular products

In [None]:
import numpy as np

def dist(x, y):
    return np.sqrt( np.power(x, 2) + np.power(y, 2) )

products_rank["rank"] = dist(products_rank["view_rank"].values, products_rank["sold_rank"].values)

# normalize again
max_value = products_rank["rank"].max()
products_rank["rank"] = products_rank["rank"] / max_value

popular_products = products_rank[["rank"]].sort_values("rank", ascending=False)
popular_products.head(20)

## Check whether our API returns the same result

In [None]:
from shuup_recommender.recommenders import popular_products
popular_products().recommend(20)

## Using the result and fethching products from database

In [None]:
from shuup.core.models import Product
data = popular_products().recommend(10).to_dict()
top_10_products_ids = data["rank"].keys()
Product.objects.filter(id__in=top_10_products_ids)